X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=mediagoblin%2Ftools%2Ftext.py;h=96df49d27a929d894d074c06459c853df0354244;hb=dfd66b789cd6cc9470c2a98bcbda9ee5e0f3ad0f;hp=de4bb28190487e95c302732fa80c50f9cfa2555a;hpb=43199340a25ab0aa252796f1c865e9ebf0e96649;p=mediagoblin.git diff --git a/mediagoblin/tools/text.py b/mediagoblin/tools/text.py index de4bb281..96df49d2 100644 --- a/mediagoblin/tools/text.py +++ b/mediagoblin/tools/text.py @@ -1,5 +1,5 @@ # GNU MediaGoblin -- federated, autonomous media hosting -# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -21,6 +21,7 @@ from lxml.html.clean import Cleaner from mediagoblin import mg_globals from mediagoblin.tools import url + # A super strict version of the lxml.html cleaner class HTML_CLEANER = Cleaner( scripts=True, @@ -35,13 +36,15 @@ HTML_CLEANER = Cleaner( forms=True, annoying_tags=True, allow_tags=[ - 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'], - remove_unknown_tags=False, # can't be used with allow_tags + 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br', + 'pre', 'code'], + remove_unknown_tags=False, # can't be used with allow_tags safe_attrs_only=True, - add_nofollow=True, # for now + add_nofollow=True, # for now host_whitelist=(), whitelist_tags=set([])) + def clean_html(html): # clean_html barfs on an empty string if not html: @@ -49,6 +52,7 @@ def clean_html(html): return HTML_CLEANER.clean_html(html) + def convert_to_tag_list_of_dicts(tag_string): """ Filter input from incoming string containing user tags, @@ -63,31 +67,31 @@ def convert_to_tag_list_of_dicts(tag_string): stripped_tag_string = u' '.join(tag_string.strip().split()) # Split the tag string into a list of tags - for tag in stripped_tag_string.split( - mg_globals.app_config['tags_delimiter']): - + for tag in stripped_tag_string.split(','): + tag = tag.strip() # Ignore empty or duplicate tags - if tag.strip() and tag.strip() not in [t['name'] for t in taglist]: - - taglist.append({'name': tag.strip(), - 'slug': url.slugify(tag.strip())}) + if tag and tag not in [t['name'] for t in taglist]: + taglist.append({'name': tag, + 'slug': url.slugify(tag)}) return taglist + def media_tags_as_string(media_entry_tags): """ Generate a string from a media item's tags, stored as a list of dicts This is the opposite of convert_to_tag_list_of_dicts """ - media_tag_string = '' + tags_string = '' if media_entry_tags: - media_tag_string = mg_globals.app_config['tags_delimiter'].join( - [tag['name'] for tag in media_entry_tags]) - return media_tag_string + tags_string = u', '.join([tag['name'] for tag in media_entry_tags]) + return tags_string + TOO_LONG_TAG_WARNING = \ u'Tags must be shorter than %s characters. Tags that are too long: %s' + def tag_length_validator(form, field): """ Make sure tags do not exceed the maximum tag length. @@ -99,11 +103,14 @@ def tag_length_validator(form, field): if too_long_tags: raise wtforms.ValidationError( - TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \ + TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], ', '.join(too_long_tags))) -MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape') +# Don't use the safe mode, because lxml.html.clean is better and we are using +# it anyway +UNSAFE_MARKDOWN_INSTANCE = markdown.Markdown() + def cleaned_markdown_conversion(text): """ @@ -114,4 +121,4 @@ def cleaned_markdown_conversion(text): if not text: return u'' - return clean_html(MARKDOWN_INSTANCE.convert(text)) + return clean_html(UNSAFE_MARKDOWN_INSTANCE.convert(text))