Commit | Line | Data |
---|---|---|
03ae172a AW |
1 | # GNU MediaGoblin -- federated, autonomous media hosting |
2 | # Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. | |
3 | # | |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | import wtforms | |
18 | import markdown | |
19 | from lxml.html.clean import Cleaner | |
20 | ||
21 | from mediagoblin import mg_globals | |
22 | from mediagoblin.tools import url | |
23 | ||
24 | # A super strict version of the lxml.html cleaner class | |
25 | HTML_CLEANER = Cleaner( | |
26 | scripts=True, | |
27 | javascript=True, | |
28 | comments=True, | |
29 | style=True, | |
30 | links=True, | |
31 | page_structure=True, | |
32 | processing_instructions=True, | |
33 | embedded=True, | |
34 | frames=True, | |
35 | forms=True, | |
36 | annoying_tags=True, | |
37 | allow_tags=[ | |
38 | 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'], | |
39 | remove_unknown_tags=False, # can't be used with allow_tags | |
40 | safe_attrs_only=True, | |
41 | add_nofollow=True, # for now | |
42 | host_whitelist=(), | |
43 | whitelist_tags=set([])) | |
44 | ||
45 | def clean_html(html): | |
46 | # clean_html barfs on an empty string | |
47 | if not html: | |
48 | return u'' | |
49 | ||
50 | return HTML_CLEANER.clean_html(html) | |
51 | ||
52 | def convert_to_tag_list_of_dicts(tag_string): | |
53 | """ | |
54 | Filter input from incoming string containing user tags, | |
55 | ||
56 | Strips trailing, leading, and internal whitespace, and also converts | |
57 | the "tags" text into an array of tags | |
58 | """ | |
59 | taglist = [] | |
60 | if tag_string: | |
61 | ||
62 | # Strip out internal, trailing, and leading whitespace | |
63 | stripped_tag_string = u' '.join(tag_string.strip().split()) | |
64 | ||
65 | # Split the tag string into a list of tags | |
66 | for tag in stripped_tag_string.split( | |
67 | mg_globals.app_config['tags_delimiter']): | |
68 | ||
69 | # Ignore empty or duplicate tags | |
70 | if tag.strip() and tag.strip() not in [t['name'] for t in taglist]: | |
71 | ||
72 | taglist.append({'name': tag.strip(), | |
73 | 'slug': url.slugify(tag.strip())}) | |
74 | return taglist | |
75 | ||
76 | def media_tags_as_string(media_entry_tags): | |
77 | """ | |
78 | Generate a string from a media item's tags, stored as a list of dicts | |
79 | ||
80 | This is the opposite of convert_to_tag_list_of_dicts | |
81 | """ | |
82 | media_tag_string = '' | |
83 | if media_entry_tags: | |
84 | media_tag_string = mg_globals.app_config['tags_delimiter'].join( | |
85 | [tag['name'] for tag in media_entry_tags]) | |
86 | return media_tag_string | |
87 | ||
88 | TOO_LONG_TAG_WARNING = \ | |
89 | u'Tags must be shorter than %s characters. Tags that are too long: %s' | |
90 | ||
91 | def tag_length_validator(form, field): | |
92 | """ | |
93 | Make sure tags do not exceed the maximum tag length. | |
94 | """ | |
95 | tags = convert_to_tag_list_of_dicts(field.data) | |
96 | too_long_tags = [ | |
97 | tag['name'] for tag in tags | |
98 | if len(tag['name']) > mg_globals.app_config['tags_max_length']] | |
99 | ||
100 | if too_long_tags: | |
101 | raise wtforms.ValidationError( | |
102 | TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \ | |
103 | ', '.join(too_long_tags))) | |
104 | ||
105 | ||
106 | MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape') | |
107 | ||
108 | def cleaned_markdown_conversion(text): | |
109 | """ | |
110 | Take a block of text, run it through MarkDown, and clean its HTML. | |
111 | """ | |
112 | # Markdown will do nothing with and clean_html can do nothing with | |
113 | # an empty string :) | |
114 | if not text: | |
115 | return u'' | |
116 | ||
117 | return clean_html(MARKDOWN_INSTANCE.convert(text)) |