1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 from lxml
.html
.clean
import Cleaner
21 from mediagoblin
import mg_globals
22 from mediagoblin
.tools
import url
24 # A super strict version of the lxml.html cleaner class
25 HTML_CLEANER
= Cleaner(
32 processing_instructions
=True,
38 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
39 remove_unknown_tags
=False, # can't be used with allow_tags
41 add_nofollow
=True, # for now
43 whitelist_tags
=set([]))
46 # clean_html barfs on an empty string
50 return HTML_CLEANER
.clean_html(html
)
52 def convert_to_tag_list_of_dicts(tag_string
):
54 Filter input from incoming string containing user tags,
56 Strips trailing, leading, and internal whitespace, and also converts
57 the "tags" text into an array of tags
62 # Strip out internal, trailing, and leading whitespace
63 stripped_tag_string
= u
' '.join(tag_string
.strip().split())
65 # Split the tag string into a list of tags
66 for tag
in stripped_tag_string
.split(
67 mg_globals
.app_config
['tags_delimiter']):
69 # Ignore empty or duplicate tags
70 if tag
.strip() and tag
.strip() not in [t
['name'] for t
in taglist
]:
72 taglist
.append({'name': tag
.strip(),
73 'slug': url
.slugify(tag
.strip())})
76 def media_tags_as_string(media_entry_tags
):
78 Generate a string from a media item's tags, stored as a list of dicts
80 This is the opposite of convert_to_tag_list_of_dicts
84 media_tag_string
= mg_globals
.app_config
['tags_delimiter'].join(
85 [tag
['name'] for tag
in media_entry_tags
])
86 return media_tag_string
88 TOO_LONG_TAG_WARNING
= \
89 u
'Tags must be shorter than %s characters. Tags that are too long: %s'
91 def tag_length_validator(form
, field
):
93 Make sure tags do not exceed the maximum tag length.
95 tags
= convert_to_tag_list_of_dicts(field
.data
)
97 tag
['name'] for tag
in tags
98 if len(tag
['name']) > mg_globals
.app_config
['tags_max_length']]
101 raise wtforms
.ValidationError(
102 TOO_LONG_TAG_WARNING
% (mg_globals
.app_config
['tags_max_length'], \
103 ', '.join(too_long_tags
)))
106 MARKDOWN_INSTANCE
= markdown
.Markdown(safe_mode
='escape')
108 def cleaned_markdown_conversion(text
):
110 Take a block of text, run it through MarkDown, and clean its HTML.
112 # Markdown will do nothing with and clean_html can do nothing with
117 return clean_html(MARKDOWN_INSTANCE
.convert(text
))