assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park'
assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc'
assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef'
+ # Russian
+ assert url.slugify(u'\u043f\u0440\u043e\u0433\u0443\u043b\u043a\u0430 '
+ u'\u0432 \u043f\u0430\u0440\u043a\u0435') == u'progulka-v-parke'
+ # Korean
+ assert (url.slugify(u'\uacf5\uc6d0\uc5d0\uc11c \uc0b0\ucc45') ==
+ u'gongweoneseo-sancaeg')
def test_locale_to_lower_upper():
"""
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
-# This import *is* used; see word.encode('tranlit/long') below.
-from unicodedata import normalize
-
-try:
- import translitcodec
- USING_TRANSLITCODEC = True
-except ImportError:
- USING_TRANSLITCODEC = False
-
+from unidecode import unidecode
_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
"""
result = []
for word in _punct_re.split(text.lower()):
- if USING_TRANSLITCODEC:
- word = word.encode('translit/long')
- else:
- word = normalize('NFKD', word).encode('ascii', 'ignore')
-
- if word:
- result.append(word)
+ result.extend(unidecode(word).split())
return unicode(delim.join(result))
'pytz',
'six',
'oauthlib==0.5.0',
+ 'unidecode',
## Annoying. Please remove once we can! We only indirectly
## use pbr, and currently it breaks things, presumably till