Arc Forumnew | comments | leaders | submitlogin
1 point by lojic 5145 days ago | link | parent

I found the following implementation in Clojure which compares much more favorably to Norvig's Python version than the other Scheme/Lisp versions I've seen. Still curious about how Arc might compare.


  (defn words [text] (re-seq #"[a-z]+" (. text (toLowerCase))))

  (defn train [features]
    (reduce (fn [model f] (assoc model f (inc (get model f 1)))) 
            {} features))

  (def *nwords* (train (words (slurp "big.txt"))))

  (defn edits1 [word]
    (let [alphabet "abcdefghijklmnopqrstuvwxyz", n (count word)]
      (distinct (concat
        (for [i (range n)] (str (subs word 0 i) (subs word (inc i))))
        (for [i (range (dec n))]
          (str (subs word 0 i) (nth word (inc i)) (nth word i) (subs word (+ 2 i))))
        (for [i (range n) c alphabet] (str (subs word 0 i) c (subs word (inc i))))
        (for [i (range (inc n)) c alphabet] (str (subs word 0 i) c (subs word i)))))))

  (defn known [words nwords] (for [w words :when (nwords w)]  w))

  (defn known-edits2 [word nwords] 
    (for [e1 (edits1 word) e2 (edits1 e1) :when (nwords e2)]  e2))

  (defn correct [word nwords]
    (let [candidates (or (known [word] nwords) (known (edits1 word) nwords) 
                         (known-edits2 word nwords) [word])]
      (apply max-key #(get nwords % 1) candidates)))