From 14a44feb02b60e5530c7fa3f54bb087f7dd3a9de Mon Sep 17 00:00:00 2001 From: Joel Holdbrooks Date: Tue, 6 Aug 2013 23:28:36 -0700 Subject: [PATCH] Use character sets at the end of groups when possible --- src/frak.clj | 26 ++++++++++++++++---------- test/frak_test.clj | 8 +++++++- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/frak.clj b/src/frak.clj index 115d54d..44cac86 100644 --- a/src/frak.clj +++ b/src/frak.clj @@ -62,21 +62,27 @@ (format (re-group-fmt) (s/join "|" strs)) s)) +(defn- re-char-set [chars] + (format "[%s]" (apply str chars))) + (defn- render-trie [trie] (let [{vs :visitors ts :terminals} (meta trie) terminal? (set ts) ks (->> (keys trie) (sort-by (frequencies vs)) - reverse)] - (re-group - (for [k ks] - (let [sk (escape k) - fmt (if (terminal? k) - (str "%s" (re-group-fmt) "?") - "%s%s")] - (if-let [branch (trie k)] - (format fmt sk (render-trie branch)) - sk)))))) + reverse) + nks (if-let [cs (seq (filter #(nil? (trie %)) ks))] + (when (< 1 (count cs)) cs)) + char-set (and (seq nks) (re-char-set nks)) + branches (for [k (remove (set nks) ks)] + (let [sk (escape k) + fmt (if (terminal? k) + (str "%s" (re-group-fmt) "?") + "%s%s")] + (if-let [branch (trie k)] + (format fmt sk (render-trie branch)) + sk)))] + (re-group (if char-set (conj branches char-set) branches)))) (defn pattern "Construct a regular expression from a collection of strings." diff --git a/test/frak_test.clj b/test/frak_test.clj index c3bae69..8bb8fdf 100644 --- a/test/frak_test.clj +++ b/test/frak_test.clj @@ -58,4 +58,10 @@ (let [pat1 (pattern ["foo" "foot"]) pat2 (pattern ["foo" "" "foot"])] (is (= (str pat1) - (str pat2))))) + (str pat2)))) + + (is (= "ba[trz]" + (str (pattern ["bat" "bar" "baz"])))) + + (is (= "b(?:i[pt]|at)" + (str (pattern ["bat" "bip" "bit"]))))) -- 2.25.1