Back sessions with It's Dangerous.

[mediagoblin.git] / mediagoblin / tools / url.py
diff --git a/mediagoblin/tools/url.py b/mediagoblin/tools/url.py

index 458ef2c86f6aa9c138a3ada10928b05efa6e7e2a..d9179f9e2eab18a1f168160df890402d29e99d34 100644 (file)
--- a/mediagoblin/tools/url.py
+++ b/mediagoblin/tools/url.py
@@ -1,5 +1,5 @@
  # GNU MediaGoblin -- federated, autonomous media hosting
-# Copyright (C) 2011 Free Software Foundation, Inc
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU Affero General Public License as published by
@@ -15,9 +15,18 @@
  # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  
  import re
-import translitcodec
+# This import *is* used; see word.encode('tranlit/long') below.
+from unicodedata import normalize
+
+try:
+    import translitcodec
+    USING_TRANSLITCODEC = True
+except ImportError:
+    USING_TRANSLITCODEC = False
+
+
+_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
  
-_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
  
  def slugify(text, delim=u'-'):
      """
@@ -25,7 +34,11 @@ def slugify(text, delim=u'-'):
      """
      result = []
      for word in _punct_re.split(text.lower()):
-        word = word.encode('translit/long')
+        if USING_TRANSLITCODEC:
+            word = word.encode('translit/long')
+        else:
+            word = normalize('NFKD', word).encode('ascii', 'ignore')
+
          if word:
              result.append(word)
      return unicode(delim.join(result))