(ns frak
+ "Generate regulare expressions from collections of strings."
(:require [clojure.string :as s]))
-(comment
- ;; frak generates regular expressions from collections of strings.
-
- ;; Usage:
- (frak/pattern ["foo" "bar" "baz" "quux"])
- ;; => #"(?:ba(?:z|r)|foo|quux)"
- )
-
-;; Utilities
+;;;; Utilities
(defn- prefixes
"Return a list of all prefixes for a given collection."
[coll]
- (map-indexed
- (fn [i _]
- (take (inc i) coll))
- coll))
+ (map-indexed (fn [i _] (take (inc i) coll)) coll))
+
+(defn- update-meta [obj ks f & args]
+ (apply vary-meta obj update-in ks f args))
-;; Trie construction
+;;;; Trie construction
(defn- grow [trie [_ & cs :as chars] terminal?]
(letfn [(visit [inner-trie]
(let [it (or inner-trie {})
lc (last chars)
it (if terminal?
- (vary-meta it update-in [:terminals] conj lc)
+ (update-meta it [:terminals] conj lc)
it)]
(-> it
- (vary-meta update-in [:visitors] conj lc)
+ (update-meta [:visitors] conj lc)
(assoc lc (get-in trie chars)))))]
(if (seq cs)
(update-in trie (butlast chars) visit)
(recur (grow t cs false) (next ps))
(grow t (first ps) true))))))
-
(defn- build-trie [strs]
(reduce trie-put {} strs))
;;;; Pattern rendering
-;; Characters to escape when rendering a regular expression.
-(def ^{:private true}
- escape-chars
+(def ^{:private true
+ :doc "Characters to escape when rendering a regular expression."}
+ escape-char?
#{\\ \^ \$ \* \+ \? \. \| \( \) \{ \} \[ \]})
+(defn escape [c]
+ (if (escape-char? c) (str "\\" c) (str c)))
+
(def ^{:private true :dynamic true} *capture* false)
(defn- re-group-fmt []
- (let [start (if *capture* "(" "(?:")]
- (str start "%s)")))
+ (str (if *capture* "(" "(?:") "%s)"))
(defn- re-group [[s & more :as strs]]
(if (seq more)
terminal? (set ts)
ks (->> (keys trie)
(sort-by (frequencies vs))
- reverse )]
+ reverse)]
(re-group
(for [k ks]
- (let [sk (if (escape-chars k) (str "\\" k) k)]
+ (let [sk (escape k)
+ fmt (if (terminal? k)
+ (str "%s" (re-group-fmt) "?")
+ "%s%s")]
(if-let [branch (trie k)]
- (if (terminal? k)
- (->> (render-trie branch)
- (format (str "%s" (re-group-fmt) "?") sk))
- (->> (render-trie branch)
- (format "%s%s" sk)))
+ (format fmt sk (render-trie branch))
sk))))))
(defn pattern
([strs capture?]
{:pre [(every? string? strs)]}
(binding [*capture* capture?]
- (-> strs
- build-trie
- render-trie
- str
- re-pattern))))
+ (-> strs build-trie render-trie str re-pattern))))