From b486d91a767a5b3d9112a045cc51846ebff9fa0b Mon Sep 17 00:00:00 2001 From: Joe Littlejohn Date: Wed, 3 May 2017 09:50:16 +0100 Subject: [PATCH] Add whole-words? option for generated patterns This option causes patterns to require a word boundary on either end of the match. For instance a pattern built from: [ "foo" "bar" "baz" ] Will create a match against the following strings: "foo is a metasyntactic variable" "sometimes we use baz" but will not create a match against the following: "food and nutrition" "clubs and bars" --- src/cljx/frak.cljx | 19 +++++++++++++++---- test/frak_test.clj | 16 ++++++++++++++-- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/cljx/frak.cljx b/src/cljx/frak.cljx index 8566a23..f0d6222 100644 --- a/src/cljx/frak.cljx +++ b/src/cljx/frak.cljx @@ -61,6 +61,11 @@ *escape-chars* (:default metacharacters)) +(def ^{:private true + :dynamic true + :doc "Whether the rendered regex should match only whole words"} + *whole-words* false) + (defn- escape "Escape a character if it is an element of `*escape-chars*`." [c] @@ -91,7 +96,9 @@ (str (re-group-start *capture*) (string/join strs) - (re-group-end optional?))))) + (re-group-end optional?) + (when *whole-words* + "\\b"))))) (defn- re-or "Return a collection of strings joined with a regular expression or @@ -200,7 +207,8 @@ (def ^:private default-options {:capture? false :exact? false - :escape-chars (:default metacharacters)}) + :escape-chars (:default metacharacters) + :whole-words? false}) (defn string-pattern "Construct a regular expression as a string from a collection @@ -212,13 +220,16 @@ cs (or (get* opts :escape-chars) *escape-chars*) cs (if (coll? cs) cs (get* metacharacters cs)) pattern (binding [*capture* (get* opts :capture?) - *escape-chars* cs] + *escape-chars* cs + *whole-words* (get* opts :whole-words?)] (-> (build-trie strs) render-trie remove-unecessary-grouping))] (if (get* opts :exact?) (str "^" pattern "$") - pattern)))) + (if (get* opts :whole-words?) + (str "\\b" pattern) + pattern))))) #+cljs (def ^:export stringPattern string-pattern) diff --git a/test/frak_test.clj b/test/frak_test.clj index 23a0f41..5b43306 100644 --- a/test/frak_test.clj +++ b/test/frak_test.clj @@ -5,8 +5,8 @@ (def trie-put #'frak/trie-put) (def build-trie #'frak/build-trie) -(deftest trie-test - (is (= (build-trie ["a" "b"]) +(deftest trie-test + (is (= (build-trie ["a" "b"]) {:char nil :terminal? false :children #{{:char \a @@ -71,3 +71,15 @@ (are [words] (every? #(re-matches (pattern words) %) words) ["achy" "achylia" "achylous" "achymia" "achymous"] ["aching" "achingly"])) + +(deftest pattern-whole-words + (is (= ["k pop"] + (re-seq (pattern ["pop" "k pop"]) "uk pop"))) + + (is (= ["k pop"] + (re-seq (pattern ["pop" "k pop"] {:whole-words? false}) "uk pop"))) + + (is (= ["uk" "pop" "rock"] + (re-seq (pattern ["pop" "k pop" "rock" "uk"] {:whole-words? true}) "uk pop and rock"))) + + (is (empty? (re-seq (pattern ["pop" "k pop"] {:whole-words? true}) "uk pops")))) -- 2.25.1