Add code to disambiguate pronouns dynamically.

This commit improves the display code so that the
smallest unambiguous abbreviation is calculated
based on the contents of pronouns.tab.
This commit is contained in:
Erik Osheim 2015-03-15 01:37:15 -04:00
parent 08cadfb6d3
commit 573294ba75
2 changed files with 34 additions and 7 deletions

View File

@ -90,16 +90,14 @@
(take 5 inputs) (take 5 inputs)
(u/table-lookup inputs pronouns-table)))) (u/table-lookup inputs pronouns-table))))
;; we could choose to display the entire row for the label. (defn make-link [pair]
;; currently the first two entries are enough to disambiguate the (let [link (str "/" (s/join "/" (second pair)))
;; pronouns -- will that always be true? label (str (s/join "/" (first pair)))]
(defn make-link [row]
(let [link (str "/" (s/join "/" row))
label (str (first row) "/" (first (rest row)))]
[:li [:a {:href link} label]])) [:li [:a {:href link} label]]))
(defn front [pronouns-table] (defn front [pronouns-table]
(let [links (map make-link (sort pronouns-table)) (let [abbreviations (u/abbreviate (sort pronouns-table))
links (map make-link abbreviations)
title "Pronoun Island"] title "Pronoun Island"]
(html (html
[:html [:html

View File

@ -14,3 +14,32 @@
(defn tabfile-lookup (defn tabfile-lookup
[query-key tabfile] [query-key tabfile]
(table-lookup query-key (slurp-tabfile tabfile))) (table-lookup query-key (slurp-tabfile tabfile)))
; given a row and its lexically-closest neighbors,
; determine the smallest abbreviation which is still
; distinct.
(defn disambiguate
[prev row next]
(loop [n 1]
(let [row-n (take n row)]
(cond
(>= n 5) row
(= row-n (take n prev)) (recur (+ n 1))
(= row-n (take n next)) (recur (+ n 1))
:else row-n))))
; given a list of pronoun rows, return a list of
; pairs, where the first item is the abbreviation
; and the second is the original pronoun row.
(defn abbreviate
[sorted-table]
(loop [acc nil
prev nil
row (first sorted-table)
todo (rest sorted-table)]
(let [next (first todo)
abbrev (disambiguate prev row next)
pair (list abbrev row)
acc2 (conj acc pair)]
(if (empty? todo) (reverse acc2)
(recur acc2 row next (rest todo))))))