From fef2a3db211af4b565917fefd751a3d269a92018 Mon Sep 17 00:00:00 2001 From: Joel Holdbrooks Date: Mon, 29 Jul 2013 10:40:27 -0700 Subject: [PATCH] Initial commit --- .gitignore | 12 ++++++ README.md | 13 ++++++ project.clj | 6 +++ src/frak.clj | 101 +++++++++++++++++++++++++++++++++++++++++++++ test/frak_test.clj | 56 +++++++++++++++++++++++++ 5 files changed, 188 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 project.clj create mode 100644 src/frak.clj create mode 100644 test/frak_test.clj diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cc2ff39 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/target +/lib +/classes +/checkouts +pom.xml +pom.xml.asc +*.jar +*.class +.lein-deps-sum +.lein-failures +.lein-plugins +.lein-repl-history diff --git a/README.md b/README.md new file mode 100644 index 0000000..6fa64ba --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# frak + +frak transforms collections of strings into regular expressions for +matching those strings. + +## Usage + +```clojure +user> (require 'frak) +nil +user> (frak/pattern ["foo" "bar" "baz" "quux"]) +#"(?:ba(?:r|z)|foo|quux)" +``` diff --git a/project.clj b/project.clj new file mode 100644 index 0000000..66d7587 --- /dev/null +++ b/project.clj @@ -0,0 +1,6 @@ +(defproject frak "0.1.0-SNAPSHOT" + :description "Transform collections of strings in to regular expressions." + :url "http://github.com/noprompt/frak" + :license {:name "Eclipse Public License" + :url "http://www.eclipse.org/legal/epl-v10.html"} + :dependencies [[org.clojure/clojure "1.4.0"]]) diff --git a/src/frak.clj b/src/frak.clj new file mode 100644 index 0000000..035a09c --- /dev/null +++ b/src/frak.clj @@ -0,0 +1,101 @@ +(ns frak + (:require [clojure.string :as s])) + +(comment + ;; frak generates regular expressions from collections of strings. + + ;; Usage: + (frak/pattern ["foo" "bar" "baz" "quux"]) + ;; => #"(?:ba(?:z|r)|foo|quux)" + ) + +;; Utilities + +(defn- prefixes + "Return a list of all prefixes for a given collection." + [coll] + (map-indexed + (fn [i _] + (take (inc i) coll)) + coll)) + +;; Trie construction + +(defn- grow [trie [_ & cs :as chars] terminal?] + (letfn [(visit [inner-trie] + (let [it (or inner-trie {}) + lc (last chars) + it (if terminal? + (vary-meta it update-in [:terminals] conj lc) + it)] + (-> it + (vary-meta update-in [:visitors] conj lc) + (assoc lc (get-in trie chars)))))] + (if (seq cs) + (update-in trie (butlast chars) visit) + (visit trie)))) + +(defn- trie-put + ([s] + {:pre [(string? s)]} + (trie-put {} s)) + ([trie s] + {:pre [(map? trie) (string? s)]} + (if-not (seq s) + {} + (loop [t trie, ps (prefixes s)] + (if-let [cs (and (next ps) (first ps))] + (recur (grow t cs false) (next ps)) + (grow t (first ps) true)))))) + + +(defn- build-trie [strs] + (reduce trie-put {} strs)) + +;;;; Pattern rendering + +;; Characters to escape when rendering a regular expression. +(def ^{:private true} + escape-chars + #{\\ \^ \$ \* \+ \? \. \| \( \) \{ \} \[ \]}) + +(def ^{:private true :dynamic true} *capture* false) + +(defn- re-group-fmt [] + (let [start (if *capture* "(" "(?:")] + (str start "%s)"))) + +(defn- re-group [[s & more :as strs]] + (if (seq more) + (format (re-group-fmt) (s/join "|" strs)) + s)) + +(defn- render-trie [trie] + (let [{vs :visitors ts :terminals} (meta trie) + terminal? (set ts) + ks (->> (keys trie) + (sort-by (frequencies vs)) + reverse )] + (re-group + (for [k ks] + (let [sk (if (escape-chars k) (str "\\" k) k)] + (if-let [branch (trie k)] + (if (terminal? k) + (->> (render-trie branch) + (format (str "%s" (re-group-fmt) "?") sk)) + (->> (render-trie branch) + (format "%s%s" sk))) + sk)))))) + +(defn pattern + "Construct a regular expression from a collection of strings." + ([strs] + (pattern strs false)) + ([strs capture?] + {:pre [(every? string? strs)]} + (binding [*capture* capture?] + (-> strs + build-trie + render-trie + str + re-pattern)))) diff --git a/test/frak_test.clj b/test/frak_test.clj new file mode 100644 index 0000000..4f7b365 --- /dev/null +++ b/test/frak_test.clj @@ -0,0 +1,56 @@ +(ns frak-test + (:use clojure.test + frak)) + +(def trie-put #'frak/trie-put) +(def build-trie #'frak/build-trie) + +(deftest trie-test + (is (= (-> (trie-put "a") + (trie-put "b")) + ^{:terminals '(\b \a) + :visitors '(\b \a)} + {\a nil + \b nil})) + + (is (= (-> (trie-put "aaa") + (trie-put "ab")) + ^{:visitors '(\a\a)} + {\a + ^{:terminals '(\b) + :visitors '(\a \b)} + {\a + ^{:terminals '(\a) + :visitors '(\a)} + {\a nil} + \b nil}})) + + (is (= (-> (trie-put "ab") + (trie-put "aaa")) + ^{:visitors '(\a\a)} + {\a + ^{:terminals '(\b) + :visitors '(\a \b)} + {\a + ^{:terminals '(\a) + :visitors '(\a)} + {\a nil} + \b nil}}))) + +(deftest pattern-test + (let [strs1 ["foo" "bar" "baz"] + strs2 ["baz" "bar" "foo"] + pat1 (pattern strs1) + pat2 (pattern strs2)] + (is (every? #(re-matches pat1 %) strs1)) + + (is (every? #(re-matches pat2 %) strs1)) + + (is (every? #(re-matches pat1 %) strs2)) + + (is (every? #(re-matches pat2 %) strs2)) + + (is (not (or (re-matches pat1 "f") + (re-matches pat1 "b") + (re-matches pat1 "ba") + (re-matches pat1 "fo")))))) -- 2.25.1