# GNU MediaGoblin -- federated, autonomous media hosting
-# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
+# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
from mediagoblin import mg_globals
from mediagoblin.tools import url
+
# A super strict version of the lxml.html cleaner class
HTML_CLEANER = Cleaner(
scripts=True,
forms=True,
annoying_tags=True,
allow_tags=[
- 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
- remove_unknown_tags=False, # can't be used with allow_tags
+ 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br',
+ 'pre', 'code'],
+ remove_unknown_tags=False, # can't be used with allow_tags
safe_attrs_only=True,
- add_nofollow=True, # for now
+ add_nofollow=True, # for now
host_whitelist=(),
whitelist_tags=set([]))
+
def clean_html(html):
# clean_html barfs on an empty string
if not html:
return HTML_CLEANER.clean_html(html)
+
def convert_to_tag_list_of_dicts(tag_string):
"""
Filter input from incoming string containing user tags,
stripped_tag_string = u' '.join(tag_string.strip().split())
# Split the tag string into a list of tags
- for tag in stripped_tag_string.split(
- mg_globals.app_config['tags_delimiter']):
-
+ for tag in stripped_tag_string.split(','):
+ tag = tag.strip()
# Ignore empty or duplicate tags
- if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
-
- taglist.append({'name': tag.strip(),
- 'slug': url.slugify(tag.strip())})
+ if tag and tag not in [t['name'] for t in taglist]:
+ taglist.append({'name': tag,
+ 'slug': url.slugify(tag)})
return taglist
+
def media_tags_as_string(media_entry_tags):
"""
Generate a string from a media item's tags, stored as a list of dicts
This is the opposite of convert_to_tag_list_of_dicts
"""
- media_tag_string = ''
+ tags_string = ''
if media_entry_tags:
- media_tag_string = mg_globals.app_config['tags_delimiter'].join(
- [tag['name'] for tag in media_entry_tags])
- return media_tag_string
+ tags_string = u', '.join([tag['name'] for tag in media_entry_tags])
+ return tags_string
+
TOO_LONG_TAG_WARNING = \
u'Tags must be shorter than %s characters. Tags that are too long: %s'
+
def tag_length_validator(form, field):
"""
Make sure tags do not exceed the maximum tag length.
if too_long_tags:
raise wtforms.ValidationError(
- TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
+ TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'],
', '.join(too_long_tags)))
-MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
+# Don't use the safe mode, because lxml.html.clean is better and we are using
+# it anyway
+UNSAFE_MARKDOWN_INSTANCE = markdown.Markdown()
+
def cleaned_markdown_conversion(text):
"""
if not text:
return u''
- return clean_html(MARKDOWN_INSTANCE.convert(text))
+ return clean_html(UNSAFE_MARKDOWN_INSTANCE.convert(text))