Back sessions with It's Dangerous.

[mediagoblin.git] / mediagoblin / tools / url.py
diff --git a/mediagoblin/tools/url.py b/mediagoblin/tools/url.py

index e2caed393386af2320f92d3cb70f5c908f26e813..d9179f9e2eab18a1f168160df890402d29e99d34 100644 (file)
--- a/mediagoblin/tools/url.py
+++ b/mediagoblin/tools/url.py
@@ -1,5 +1,5 @@
  # GNU MediaGoblin -- federated, autonomous media hosting
-# Copyright (C) 2011 Free Software Foundation, Inc
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU Affero General Public License as published by
@@ -15,9 +15,17 @@
  # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  
  import re
+# This import *is* used; see word.encode('tranlit/long') below.
+from unicodedata import normalize
  
+try:
+    import translitcodec
+    USING_TRANSLITCODEC = True
+except ImportError:
+    USING_TRANSLITCODEC = False
  
-_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
+
+_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
  
  
  def slugify(text, delim=u'-'):
@@ -26,7 +34,11 @@ def slugify(text, delim=u'-'):
      """
      result = []
      for word in _punct_re.split(text.lower()):
-        word = word.encode('translit/long')
+        if USING_TRANSLITCODEC:
+            word = word.encode('translit/long')
+        else:
+            word = normalize('NFKD', word).encode('ascii', 'ignore')
+
          if word:
              result.append(word)
      return unicode(delim.join(result))