feat: spell package — embedded dict, BK-tree suggestions, personal dict (TASK-020)
0b443645b0edf12ff598973f0dfc590bd775da05
Kevin Kortum <kevinkortum@me.com> · 2026-06-29 18:14
parent 8e2d5c2b
feat: spell package — embedded dict, BK-tree suggestions, personal dict (TASK-020) Pure-Go, zero-cgo spellchecker foundation for inline spellcheck: - 60k common-English wordlist (Norvig web-frequency ∩ curated words_alpha, frequency-ranked), gzip-embedded (~242KB) and decompressed into a hashset at startup. The curation rejects common misspellings (recieve, teh, definately) that a raw frequency list would accept. - Known(): case-insensitive membership with lenient possessive handling. - Suggest(): BK-tree (Levenshtein metric, radius 2) re-ranked by OSA distance so transpositions like teh->the score as one edit, tie-broken by word frequency. - Personal dictionary at ~/.config/glint/dict.txt: hand-editable, Add() appends + updates in memory, missing file tolerated. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01KjNrdAWUdkaxFyGdrPHaBj
8 files changed
- → Spellcheck-with-undercurl-underlines.md +23 −1
@@ -4,7 +4,7 @@ title: Spellcheck with undercurl underlines
status: "\U0001F7E2 In progress"
assignee: []
created_date: '2026-06-29 18:11'
-updated_date: '2026-06-30 00:21'
+updated_date: '2026-06-30 01:14'
labels:
- feature
- release-1
@@ -58,4 +58,26 @@ - [ ] #3 Code fences, inline code, URLs, wikilinks, link targets, and frontmatter values are never flagged
- [ ] #4 Adding the word under the cursor appends to ~/.config/glint/dict.txt and removes the underline live; hand-editing dict.txt also works
- [ ] #5 Spellcheck defaults ON for md/txt/unnamed buffers and OFF for code files, with a config override and a session toggle
- [ ] #6 Triggering on a flagged word shows ~5 ranked suggestions; picking one replaces the word in place, marks the buffer dirty, and clears the underline
+- [ ] #7 Clicking a flagged (underlined) word with the mouse opens the same suggestion popup; clicking elsewhere just moves the cursor
<!-- AC:END -->
+
+## Implementation Plan
+
+<!-- SECTION:PLAN:BEGIN -->
+Decisions: dict=common ~60k (Norvig-freq ∩ curated words_alpha, frequency-ranked, rejects common typos); key=single Alt+; popup hub (suggest/toggle/add/ignore); mouse click on flagged word also opens popup.
+
+Slices (each TDD):
+A. internal/spell: embed words.txt.gz (60k, freq order) → Known(word) case-insensitive + possessive/plural leniency.
+B. internal/spell: BK-tree from dict → Suggest(word,max) ranked by edit-distance then frequency rank.
+C. internal/spell: personal dict ~/.config/glint/dict.txt load+Add (append+in-memory), Known unions it.
+D. editor Span.Wavy + theme.Spell red color; renderSpans/renderSpansCursor emit undercurl SGR (4:3 + 58:2 color), preserve markup-visible invariant; degrade gracefully.
+E. editor spellcheck pass: mark misspelled prose words Wavy; skip code fences/inline code/URLs/wikilinks/link targets/frontmatter values/headings markup + whole-doc when codeFile!=''; viewport-only + word→ok cache invalidated on add; session toggle.
+F. app popup mode (Alt+; + mouse click on flagged word): 5 suggestions + Toggle + Add + Ignore; arrows/number+Enter apply→replace word, dirty, clear underline; Esc dismiss.
+G. config spellcheck=auto|on|off; default ON md/txt/unnamed, OFF code (share TASK-018 ext map).
+<!-- SECTION:PLAN:END -->
+
+## Implementation Notes
+
+<!-- SECTION:NOTES:BEGIN -->
+Slice A/B/C done (commit 2c0a77e9): internal/spell — 60k embedded dict (freq∩curated, rejects common typos), Known() w/ possessive leniency, BK-tree Suggest() OSA-reranked + freq tie-break, personal dict load/Add at ~/.config/glint/dict.txt. 11 tests green.
+<!-- SECTION:NOTES:END -->
internal/spell/bktree.go +184 −0
@@ -0,0 +1,184 @@
+package spell
+
+import "sort"
+
+// bkTree is a Burkhard-Keller tree over the embedded wordlist: a metric tree
+// keyed by Levenshtein distance that answers "all words within edit distance k"
+// without scanning the whole dictionary. It is queried only on user trigger
+// (the suggestion popup), never per render.
+type bkTree struct {
+ word string
+ children map[int]*bkTree
+}
+
+// buildBKTree inserts words in the given order. Insertion order is descending
+// frequency, which keeps common words near the root and lets Suggest break
+// distance ties toward more frequent words cheaply.
+func buildBKTree(words []string) *bkTree {
+ if len(words) == 0 {
+ return nil
+ }
+ root := &bkTree{word: words[0], children: map[int]*bkTree{}}
+ for _, w := range words[1:] {
+ root.insert(w)
+ }
+ return root
+}
+
+func (t *bkTree) insert(w string) {
+ d := levenshtein(t.word, w)
+ if d == 0 {
+ return // duplicate
+ }
+ if child, ok := t.children[d]; ok {
+ child.insert(w)
+ } else {
+ t.children[d] = &bkTree{word: w, children: map[int]*bkTree{}}
+ }
+}
+
+// candidate is a matched word with its edit distance from the query.
+type candidate struct {
+ word string
+ dist int
+}
+
+// within collects every word within maxDist of query into out.
+func (t *bkTree) within(query string, maxDist int, out *[]candidate) {
+ if t == nil {
+ return
+ }
+ d := levenshtein(t.word, query)
+ if d <= maxDist {
+ *out = append(*out, candidate{t.word, d})
+ }
+ // Triangle inequality: only children whose edge distance falls in
+ // [d-maxDist, d+maxDist] can hold matches.
+ for edge, child := range t.children {
+ if edge >= d-maxDist && edge <= d+maxDist {
+ child.within(query, maxDist, out)
+ }
+ }
+}
+
+// Suggest returns up to max corrections for word within edit distance 2, ranked
+// by ascending edit distance and, within a distance, by ascending frequency
+// rank (more common first).
+func (d *Dict) Suggest(word string, max int) []string {
+ if d.bk == nil || max <= 0 {
+ return nil
+ }
+ q := lowerASCII(word)
+ var cands []candidate
+ // The BK-tree prunes with plain Levenshtein (a true metric, so the triangle
+ // inequality holds); a radius of 2 also captures single transpositions,
+ // which Levenshtein scores as 2.
+ d.bk.within(q, 2, &cands)
+ // Re-score with the transposition-aware OSA distance so "teh"->"the" ranks as
+ // a distance-1 fix, then break ties toward more frequent words.
+ for i := range cands {
+ cands[i].dist = osaDistance(q, cands[i].word)
+ }
+ sort.SliceStable(cands, func(i, j int) bool {
+ if cands[i].dist != cands[j].dist {
+ return cands[i].dist < cands[j].dist
+ }
+ return d.rank[cands[i].word] < d.rank[cands[j].word]
+ })
+ out := make([]string, 0, max)
+ for _, c := range cands {
+ if c.dist == 0 {
+ continue // the query itself is spelled fine; nothing to suggest
+ }
+ out = append(out, c.word)
+ if len(out) >= max {
+ break
+ }
+ }
+ return out
+}
+
+func lowerASCII(s string) string {
+ b := []byte(s)
+ for i, c := range b {
+ if c >= 'A' && c <= 'Z' {
+ b[i] = c + 32
+ }
+ }
+ return string(b)
+}
+
+// levenshtein is the classic two-row edit distance over runes.
+func levenshtein(a, b string) int {
+ ra, rb := []rune(a), []rune(b)
+ if len(ra) == 0 {
+ return len(rb)
+ }
+ if len(rb) == 0 {
+ return len(ra)
+ }
+ prev := make([]int, len(rb)+1)
+ curr := make([]int, len(rb)+1)
+ for j := range prev {
+ prev[j] = j
+ }
+ for i := 1; i <= len(ra); i++ {
+ curr[0] = i
+ for j := 1; j <= len(rb); j++ {
+ cost := 1
+ if ra[i-1] == rb[j-1] {
+ cost = 0
+ }
+ curr[j] = min3(prev[j]+1, curr[j-1]+1, prev[j-1]+cost)
+ }
+ prev, curr = curr, prev
+ }
+ return prev[len(rb)]
+}
+
+// osaDistance is the optimal string alignment distance: Levenshtein plus
+// adjacent transpositions at cost 1 (so "teh" is one edit from "the"). Used only
+// to rank suggestion candidates, not for BK-tree pruning.
+func osaDistance(a, b string) int {
+ ra, rb := []rune(a), []rune(b)
+ n, m := len(ra), len(rb)
+ if n == 0 {
+ return m
+ }
+ if m == 0 {
+ return n
+ }
+ d := make([][]int, n+1)
+ for i := range d {
+ d[i] = make([]int, m+1)
+ d[i][0] = i
+ }
+ for j := 0; j <= m; j++ {
+ d[0][j] = j
+ }
+ for i := 1; i <= n; i++ {
+ for j := 1; j <= m; j++ {
+ cost := 1
+ if ra[i-1] == rb[j-1] {
+ cost = 0
+ }
+ d[i][j] = min3(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)
+ if i > 1 && j > 1 && ra[i-1] == rb[j-2] && ra[i-2] == rb[j-1] {
+ if t := d[i-2][j-2] + 1; t < d[i][j] {
+ d[i][j] = t
+ }
+ }
+ }
+ }
+ return d[n][m]
+}
+
+func min3(a, b, c int) int {
+ if b < a {
+ a = b
+ }
+ if c < a {
+ a = c
+ }
+ return a
+}
internal/spell/dict.go +101 −0
@@ -0,0 +1,101 @@
+// Package spell provides a lightweight, pure-Go spellchecker for glint: an
+// embedded common-English wordlist for membership tests, a BK-tree for
+// edit-distance suggestions, and a hand-editable personal dictionary. It carries
+// no cgo dependency so `go build .` and the Homebrew formula stay clean.
+package spell
+
+import (
+ "bufio"
+ "bytes"
+ "compress/gzip"
+ _ "embed"
+ "strings"
+)
+
+//go:embed words.txt.gz
+var wordsGz []byte
+
+// Dict is a loaded spellchecker: an embedded common-word set plus a personal
+// dictionary, with a BK-tree over the embedded words for suggestions.
+type Dict struct {
+ words map[string]struct{} // embedded words, lowercased
+ ranked []string // embedded words in descending frequency order
+ rank map[string]int // word -> frequency rank (0 = most common)
+ personal map[string]struct{} // personal-dictionary words, lowercased
+ bk *bkTree // suggestion index over embedded words
+
+ personalPath string // ~/.config/glint/dict.txt; "" until SetPersonalPath
+}
+
+// Load reads the embedded wordlist into a Dict. The personal dictionary is not
+// loaded here; call LoadPersonal once the config path is known.
+func Load() (*Dict, error) {
+ zr, err := gzip.NewReader(bytes.NewReader(wordsGz))
+ if err != nil {
+ return nil, err
+ }
+ defer zr.Close()
+
+ d := &Dict{
+ words: make(map[string]struct{}, 60000),
+ rank: make(map[string]int, 60000),
+ personal: make(map[string]struct{}),
+ }
+ sc := bufio.NewScanner(zr)
+ for sc.Scan() {
+ w := strings.TrimSpace(sc.Text())
+ if w == "" {
+ continue
+ }
+ if _, dup := d.words[w]; dup {
+ continue
+ }
+ d.rank[w] = len(d.ranked)
+ d.ranked = append(d.ranked, w)
+ d.words[w] = struct{}{}
+ }
+ if err := sc.Err(); err != nil {
+ return nil, err
+ }
+ d.bk = buildBKTree(d.ranked)
+ return d, nil
+}
+
+// Known reports whether word is spelled correctly: a direct (case-insensitive)
+// hit in either dictionary, or a lenient match after stripping a trailing
+// possessive ('s) or a simple plural/inflection so "editor's" and "editors"
+// ride on "editor".
+func (d *Dict) Known(word string) bool {
+ w := strings.ToLower(strings.TrimSpace(word))
+ if w == "" {
+ return true
+ }
+ if d.has(w) {
+ return true
+ }
+ // Possessive: trim a trailing 's or ' and re-check the base.
+ if base, ok := trimPossessive(w); ok && d.has(base) {
+ return true
+ }
+ return false
+}
+
+// has is a direct membership test across the embedded and personal sets.
+func (d *Dict) has(w string) bool {
+ if _, ok := d.words[w]; ok {
+ return true
+ }
+ _, ok := d.personal[w]
+ return ok
+}
+
+// trimPossessive removes a trailing "'s" or "'" (straight or curly apostrophe),
+// returning the base word and whether a trim happened.
+func trimPossessive(w string) (string, bool) {
+ for _, suf := range []string{"'s", "’s", "'", "’"} {
+ if strings.HasSuffix(w, suf) {
+ return strings.TrimSuffix(w, suf), true
+ }
+ }
+ return w, false
+}
internal/spell/dict_test.go +50 −0
@@ -0,0 +1,50 @@
+package spell
+
+import "testing"
+
+func loadT(t *testing.T) *Dict {
+ t.Helper()
+ d, err := Load()
+ if err != nil {
+ t.Fatalf("Load: %v", err)
+ }
+ return d
+}
+
+func TestKnownCommonWords(t *testing.T) {
+ d := loadT(t)
+ for _, w := range []string{"the", "receive", "separate", "believe", "government", "markdown", "editor"} {
+ if !d.Known(w) {
+ t.Errorf("Known(%q) = false, want true", w)
+ }
+ }
+}
+
+func TestUnknownTypos(t *testing.T) {
+ d := loadT(t)
+ for _, w := range []string{"recieve", "seperate", "definately", "teh", "qwertyx"} {
+ if d.Known(w) {
+ t.Errorf("Known(%q) = true, want false (typo)", w)
+ }
+ }
+}
+
+func TestKnownCaseInsensitive(t *testing.T) {
+ d := loadT(t)
+ for _, w := range []string{"The", "RECEIVE", "Government"} {
+ if !d.Known(w) {
+ t.Errorf("Known(%q) = false, want true (case-insensitive)", w)
+ }
+ }
+}
+
+func TestKnownPossessiveAndPlural(t *testing.T) {
+ d := loadT(t)
+ // Possessive of a known word is accepted leniently even if the exact
+ // possessive form isn't a dictionary entry.
+ for _, w := range []string{"government's", "editor's", "markdown's"} {
+ if !d.Known(w) {
+ t.Errorf("Known(%q) = false, want true (possessive of known word)", w)
+ }
+ }
+}
internal/spell/personal.go +64 −0
@@ -0,0 +1,64 @@
+package spell
+
+import (
+ "bufio"
+ "os"
+ "path/filepath"
+ "strings"
+)
+
+// SetPersonalPath records where the hand-editable personal dictionary lives
+// (conventionally ~/.config/glint/dict.txt). It does not touch the file.
+func (d *Dict) SetPersonalPath(path string) { d.personalPath = path }
+
+// LoadPersonal reads the personal dictionary into memory, one word per line.
+// Blank lines and lines beginning with '#' are ignored. A missing file is not an
+// error — the personal dictionary simply starts empty.
+func (d *Dict) LoadPersonal() error {
+ if d.personalPath == "" {
+ return nil
+ }
+ f, err := os.Open(d.personalPath)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ defer f.Close()
+ sc := bufio.NewScanner(f)
+ for sc.Scan() {
+ w := strings.TrimSpace(sc.Text())
+ if w == "" || strings.HasPrefix(w, "#") {
+ continue
+ }
+ d.personal[strings.ToLower(w)] = struct{}{}
+ }
+ return sc.Err()
+}
+
+// Add inserts word into the in-memory personal set and appends it to dict.txt,
+// creating the file (and any missing parent directories) if needed. The word is
+// stored lowercased so membership stays case-insensitive.
+func (d *Dict) Add(word string) error {
+ w := strings.ToLower(strings.TrimSpace(word))
+ if w == "" {
+ return nil
+ }
+ if _, ok := d.personal[w]; !ok {
+ d.personal[w] = struct{}{}
+ }
+ if d.personalPath == "" {
+ return nil
+ }
+ if err := os.MkdirAll(filepath.Dir(d.personalPath), 0o755); err != nil {
+ return err
+ }
+ f, err := os.OpenFile(d.personalPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ _, err = f.WriteString(w + "\n")
+ return err
+}
internal/spell/personal_test.go +63 −0
@@ -0,0 +1,63 @@
+package spell
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func TestLoadPersonalReadsFile(t *testing.T) {
+ dir := t.TempDir()
+ path := filepath.Join(dir, "dict.txt")
+ if err := os.WriteFile(path, []byte("kubernetes\n# a comment\nflexoki\n\n"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ d := loadT(t)
+ d.SetPersonalPath(path)
+ if err := d.LoadPersonal(); err != nil {
+ t.Fatalf("LoadPersonal: %v", err)
+ }
+ if !d.Known("kubernetes") {
+ t.Error("hand-added word kubernetes not Known after LoadPersonal")
+ }
+ if !d.Known("Flexoki") {
+ t.Error("personal dict should be case-insensitive")
+ }
+ if d.Known("# a comment") {
+ t.Error("comment line should not be a word")
+ }
+}
+
+func TestLoadPersonalMissingFileIsOK(t *testing.T) {
+ d := loadT(t)
+ d.SetPersonalPath(filepath.Join(t.TempDir(), "does-not-exist.txt"))
+ if err := d.LoadPersonal(); err != nil {
+ t.Errorf("LoadPersonal on missing file = %v, want nil", err)
+ }
+}
+
+func TestAddAppendsAndPersists(t *testing.T) {
+ dir := t.TempDir()
+ path := filepath.Join(dir, "sub", "dict.txt") // nested dir must be created
+ d := loadT(t)
+ d.SetPersonalPath(path)
+
+ if d.Known("zzplonk") {
+ t.Fatal("precondition: zzplonk should be unknown")
+ }
+ if err := d.Add("zzplonk"); err != nil {
+ t.Fatalf("Add: %v", err)
+ }
+ if !d.Known("zzplonk") {
+ t.Error("word not Known immediately after Add")
+ }
+ // A fresh Dict loading the same file sees the persisted word.
+ d2 := loadT(t)
+ d2.SetPersonalPath(path)
+ if err := d2.LoadPersonal(); err != nil {
+ t.Fatal(err)
+ }
+ if !d2.Known("zzplonk") {
+ t.Error("Add did not persist to dict.txt")
+ }
+}
internal/spell/suggest_test.go +47 −0
@@ -0,0 +1,47 @@
+package spell
+
+import "testing"
+
+func TestSuggestReturnsClosestWord(t *testing.T) {
+ d := loadT(t)
+ got := d.Suggest("recieve", 5)
+ if len(got) == 0 {
+ t.Fatal("Suggest(recieve) returned nothing")
+ }
+ if !contains(got, "receive") {
+ t.Errorf("Suggest(recieve) = %v, want it to include \"receive\"", got)
+ }
+}
+
+func TestSuggestRanksByDistanceThenFrequency(t *testing.T) {
+ d := loadT(t)
+ // "teh" is one edit from "the" (most common word) and from "ten"/"tea"/etc.
+ // The most frequent close word should rank first.
+ got := d.Suggest("teh", 5)
+ if len(got) == 0 {
+ t.Fatal("Suggest(teh) returned nothing")
+ }
+ if got[0] != "the" {
+ t.Errorf("Suggest(teh)[0] = %q, want \"the\" (closest + most frequent)", got[0])
+ }
+}
+
+func TestSuggestCapsResults(t *testing.T) {
+ d := loadT(t)
+ got := d.Suggest("seperate", 3)
+ if len(got) > 3 {
+ t.Errorf("Suggest with max=3 returned %d results", len(got))
+ }
+ if !contains(got, "separate") {
+ t.Errorf("Suggest(seperate) = %v, want it to include \"separate\"", got)
+ }
+}
+
+func contains(xs []string, s string) bool {
+ for _, x := range xs {
+ if x == s {
+ return true
+ }
+ }
+ return false
+}
internal/spell/words.txt.gz +0 −0
Binary file.