// Package spell provides a lightweight, pure-Go spellchecker for glint: an // embedded common-English wordlist for membership tests, a BK-tree for // edit-distance suggestions, and a hand-editable personal dictionary. It carries // no cgo dependency so `go build .` and the Homebrew formula stay clean. package spell import ( "bufio" "bytes" "compress/gzip" _ "embed" "strings" ) //go:embed words.txt.gz var wordsGz []byte // Dict is a loaded spellchecker: an embedded common-word set plus a personal // dictionary, with a BK-tree over the embedded words for suggestions. type Dict struct { words map[string]struct{} // embedded words, lowercased ranked []string // embedded words in descending frequency order rank map[string]int // word -> frequency rank (0 = most common) personal map[string]struct{} // personal-dictionary words, lowercased bk *bkTree // suggestion index over embedded words personalPath string // ~/.config/glint/dict.txt; "" until SetPersonalPath } // Load reads the embedded wordlist into a Dict. The personal dictionary is not // loaded here; call LoadPersonal once the config path is known. func Load() (*Dict, error) { zr, err := gzip.NewReader(bytes.NewReader(wordsGz)) if err != nil { return nil, err } defer func() { _ = zr.Close() }() d := &Dict{ words: make(map[string]struct{}, 60000), rank: make(map[string]int, 60000), personal: make(map[string]struct{}), } sc := bufio.NewScanner(zr) for sc.Scan() { w := strings.TrimSpace(sc.Text()) if w == "" { continue } if _, dup := d.words[w]; dup { continue } d.rank[w] = len(d.ranked) d.ranked = append(d.ranked, w) d.words[w] = struct{}{} } if err := sc.Err(); err != nil { return nil, err } d.bk = buildBKTree(d.ranked) return d, nil } // Known reports whether word is spelled correctly: a direct (case-insensitive) // hit in either dictionary, or a lenient match after stripping a trailing // possessive ('s) or a simple plural/inflection so "editor's" and "editors" // ride on "editor". func (d *Dict) Known(word string) bool { w := strings.ToLower(strings.TrimSpace(word)) if w == "" { return true } if d.has(w) { return true } // Possessive: trim a trailing 's or ' and re-check the base. if base, ok := trimPossessive(w); ok && d.has(base) { return true } return false } // has is a direct membership test across the embedded and personal sets. func (d *Dict) has(w string) bool { if _, ok := d.words[w]; ok { return true } _, ok := d.personal[w] return ok } // trimPossessive removes a trailing "'s" or "'" (straight or curly apostrophe), // returning the base word and whether a trim happened. func trimPossessive(w string) (string, bool) { for _, suf := range []string{"'s", "’s", "'", "’"} { if strings.HasSuffix(w, suf) { return strings.TrimSuffix(w, suf), true } } return w, false }