From f68c3d5558ce5c7ddfe6b9e0efd385b0d2f72b57 Mon Sep 17 00:00:00 2001 From: Joel Holdbrooks Date: Thu, 22 Aug 2013 00:39:02 -0700 Subject: [PATCH] Refactor and finish rendering process, add new tests `re-group` no longer joins strings as pipe (or) delimited values. `re-or-group` is a new function which essentially does what `re-group` used to do. This cleans up some edge cases where it's favorable to use a regular group. Post filtering has been added to the expression rendering process to remove unecessary grouping wherever possible. This is done using `clojure.string/replace` and it's a hackish, but works. --- src/cljx/frak.cljx | 72 ++++++++++++++++++++++++++++++++++------------ test/frak_test.clj | 6 +++- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/src/cljx/frak.cljx b/src/cljx/frak.cljx index f6410bd..61988f0 100644 --- a/src/cljx/frak.cljx +++ b/src/cljx/frak.cljx @@ -83,9 +83,24 @@ (when (seq strs) (str (re-group-start *capture*) - (string/join "|" strs) + (string/join strs) (re-group-end optional?))))) +(defn- re-or + "Return a collection of strings joined with a regular expression or + (`|`) character. Intended to be used with `re-or-group`." + [strs] + (string/join "|" strs)) + +(defn- re-or-group + "Convert a collection of strings into a regular expression group. When + `optional?` is truthy a the group will end with \")?\"." + ([strs] + (re-group strs false)) + ([strs optional?] + (when (seq strs) + (re-group (re-or strs) optional?)))) + (defn- re-char-set "Convert a collection of characters in to a regular expression character set. When `optional?` is truthy a \"?\" is appended." @@ -117,7 +132,7 @@ (escape char)) (defmethod render-trie ::single-child-terminal - [{:keys [char children]}] + [{:keys [char children terminal?]}] (let [child (first children)] (str (escape char) @@ -125,41 +140,60 @@ (not (seq (:children child)))) (render-trie (update-in child [:char] #(str % "?")) ) - (str (re-group-start *capture*) - (render-trie child) - (re-group-end true)))))) + (re-group (render-trie child) terminal?))))) (defmethod render-trie ::single-child-non-terminal [{:keys [char children]}] - (str (escape char) (render-trie (first children)))) + (->> (first children) + (render-trie) + (str (escape char)))) (defmethod render-trie :default [{:keys [char children terminal?]}] (let [groups (-> (juxt :terminal? :children) (group-by children) (dissoc nil)) - grouped + subpatterns (mapv (fn [[_ v]] - (str (re-char-set (map :char v)) - (-> (first v) - (dissoc :char) - (render-trie) - ;; Replace instances of "(?:[abc])" with "[abc]". - ;; This is such an ugly hack. - (string/replace #"\(\?:?(\[[^\]]+\])\)" "$1")))) + (let [chars (map :char v) + char-set (re-char-set chars) + subpattern (-> (first v) + (dissoc :char) + (render-trie))] + (if (< 1 (count chars)) + (re-group [char-set subpattern]) + (str char-set subpattern)))) groups)] (str (escape char) - (if (= (first grouped) (peek grouped)) - (str (peek grouped) (when terminal? "?")) - (re-group grouped terminal?))))) - + (if (= (first subpatterns) (peek subpatterns)) + (str (peek subpatterns) (when terminal? "?")) + (re-or-group subpatterns terminal?))))) + +;; This is, admittedly, a dirty hack and bat shit crazy but, for now, +;; it gets the job done. Until a more sophisticated data structure or +;; rendering process built this will do for now. +(defn- remove-unecessary-grouping + [pattern] + (-> pattern + ;; Replaces "(?:[xy]z)" and "(?:[xy]z)?" with "[xy]z" and + ;; "[xy]z?" respectively. + (string/replace #"\(\?:?(\[[^\]]+\])\)(\?)?" + "$1$2") + ;; Replaces "(?:[ab]cd)" with "[ab]cd". + (string/replace #"\(\?:?(\[[^\]]+\])([^\|\)]+[^\?]?)\)([^\?])" + "$1$2$3"))) + +;; TODO: This needs to be `:export`ed and work with regular +;; JavaScript. (defn string-pattern "Construct a regular expression as a string from a collection of strings." [strs opts] (let [pattern (binding [*capture* (:capture? opts)] - (-> strs build-trie render-trie))] + (-> (build-trie strs) + render-trie + remove-unecessary-grouping))] (if (:exact? opts) (str "^" pattern "$") pattern))) diff --git a/test/frak_test.clj b/test/frak_test.clj index 3af8cb6..3edacbd 100644 --- a/test/frak_test.clj +++ b/test/frak_test.clj @@ -57,4 +57,8 @@ (string-pattern ["bat" "bar" "baz"] nil)))) (is (= "b(?:i[pt]|at)" - (string-pattern ["bat" "bip" "bit"] nil)))) + (string-pattern ["bat" "bip" "bit"] nil))) + + (are [words] (every? #(re-matches (pattern words) %) words) + ["achy" "achylia" "achylous" "achymia" "achymous"] + ["aching" "achingly"])) -- 2.25.1