Commit | Line | Data |
---|---|---|
03ae172a | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
cf29e8a8 | 2 | # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. |
03ae172a AW |
3 | # |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | import wtforms | |
18 | import markdown | |
19 | from lxml.html.clean import Cleaner | |
20 | ||
21 | from mediagoblin import mg_globals | |
22 | from mediagoblin.tools import url | |
23 | ||
ee91c2b8 | 24 | |
03ae172a AW |
25 | # A super strict version of the lxml.html cleaner class |
26 | HTML_CLEANER = Cleaner( | |
27 | scripts=True, | |
28 | javascript=True, | |
29 | comments=True, | |
30 | style=True, | |
31 | links=True, | |
32 | page_structure=True, | |
33 | processing_instructions=True, | |
34 | embedded=True, | |
35 | frames=True, | |
36 | forms=True, | |
37 | annoying_tags=True, | |
38 | allow_tags=[ | |
39 | 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'], | |
40 | remove_unknown_tags=False, # can't be used with allow_tags | |
41 | safe_attrs_only=True, | |
42 | add_nofollow=True, # for now | |
43 | host_whitelist=(), | |
44 | whitelist_tags=set([])) | |
45 | ||
3038ba87 | 46 | TAGS_DELIMITER=','; |
ee91c2b8 | 47 | |
03ae172a AW |
48 | def clean_html(html): |
49 | # clean_html barfs on an empty string | |
50 | if not html: | |
51 | return u'' | |
52 | ||
53 | return HTML_CLEANER.clean_html(html) | |
54 | ||
ee91c2b8 | 55 | |
03ae172a AW |
56 | def convert_to_tag_list_of_dicts(tag_string): |
57 | """ | |
58 | Filter input from incoming string containing user tags, | |
59 | ||
60 | Strips trailing, leading, and internal whitespace, and also converts | |
61 | the "tags" text into an array of tags | |
62 | """ | |
63 | taglist = [] | |
64 | if tag_string: | |
65 | ||
66 | # Strip out internal, trailing, and leading whitespace | |
67 | stripped_tag_string = u' '.join(tag_string.strip().split()) | |
68 | ||
69 | # Split the tag string into a list of tags | |
70 | for tag in stripped_tag_string.split( | |
3038ba87 | 71 | TAGS_DELIMITER): |
03ae172a AW |
72 | |
73 | # Ignore empty or duplicate tags | |
74 | if tag.strip() and tag.strip() not in [t['name'] for t in taglist]: | |
75 | ||
76 | taglist.append({'name': tag.strip(), | |
77 | 'slug': url.slugify(tag.strip())}) | |
78 | return taglist | |
79 | ||
ee91c2b8 | 80 | |
03ae172a AW |
81 | def media_tags_as_string(media_entry_tags): |
82 | """ | |
83 | Generate a string from a media item's tags, stored as a list of dicts | |
84 | ||
85 | This is the opposite of convert_to_tag_list_of_dicts | |
86 | """ | |
87 | media_tag_string = '' | |
88 | if media_entry_tags: | |
3038ba87 | 89 | media_tag_string = (TAGS_DELIMITER+u' ').join( |
03ae172a AW |
90 | [tag['name'] for tag in media_entry_tags]) |
91 | return media_tag_string | |
92 | ||
ee91c2b8 | 93 | |
03ae172a AW |
94 | TOO_LONG_TAG_WARNING = \ |
95 | u'Tags must be shorter than %s characters. Tags that are too long: %s' | |
96 | ||
ee91c2b8 | 97 | |
03ae172a AW |
98 | def tag_length_validator(form, field): |
99 | """ | |
100 | Make sure tags do not exceed the maximum tag length. | |
101 | """ | |
102 | tags = convert_to_tag_list_of_dicts(field.data) | |
103 | too_long_tags = [ | |
104 | tag['name'] for tag in tags | |
105 | if len(tag['name']) > mg_globals.app_config['tags_max_length']] | |
106 | ||
107 | if too_long_tags: | |
108 | raise wtforms.ValidationError( | |
109 | TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \ | |
110 | ', '.join(too_long_tags))) | |
111 | ||
112 | ||
113 | MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape') | |
114 | ||
ee91c2b8 | 115 | |
03ae172a AW |
116 | def cleaned_markdown_conversion(text): |
117 | """ | |
118 | Take a block of text, run it through MarkDown, and clean its HTML. | |
119 | """ | |
120 | # Markdown will do nothing with and clean_html can do nothing with | |
121 | # an empty string :) | |
122 | if not text: | |
123 | return u'' | |
124 | ||
125 | return clean_html(MARKDOWN_INSTANCE.convert(text)) |