Commit | Line | Data |
---|---|---|
03ae172a | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
cf29e8a8 | 2 | # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. |
03ae172a AW |
3 | # |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | import wtforms | |
18 | import markdown | |
19 | from lxml.html.clean import Cleaner | |
20 | ||
21 | from mediagoblin import mg_globals | |
22 | from mediagoblin.tools import url | |
23 | ||
ee91c2b8 | 24 | |
03ae172a AW |
25 | # A super strict version of the lxml.html cleaner class |
26 | HTML_CLEANER = Cleaner( | |
27 | scripts=True, | |
28 | javascript=True, | |
29 | comments=True, | |
30 | style=True, | |
31 | links=True, | |
32 | page_structure=True, | |
33 | processing_instructions=True, | |
34 | embedded=True, | |
35 | frames=True, | |
36 | forms=True, | |
37 | annoying_tags=True, | |
38 | allow_tags=[ | |
cf29edcd JK |
39 | 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br', |
40 | 'pre', 'code'], | |
36c7d934 | 41 | remove_unknown_tags=False, # can't be used with allow_tags |
03ae172a | 42 | safe_attrs_only=True, |
36c7d934 | 43 | add_nofollow=True, # for now |
03ae172a AW |
44 | host_whitelist=(), |
45 | whitelist_tags=set([])) | |
46 | ||
ee91c2b8 | 47 | |
03ae172a AW |
48 | def clean_html(html): |
49 | # clean_html barfs on an empty string | |
50 | if not html: | |
51 | return u'' | |
52 | ||
53 | return HTML_CLEANER.clean_html(html) | |
54 | ||
ee91c2b8 | 55 | |
03ae172a AW |
56 | def convert_to_tag_list_of_dicts(tag_string): |
57 | """ | |
58 | Filter input from incoming string containing user tags, | |
59 | ||
60 | Strips trailing, leading, and internal whitespace, and also converts | |
61 | the "tags" text into an array of tags | |
62 | """ | |
63 | taglist = [] | |
64 | if tag_string: | |
65 | ||
66 | # Strip out internal, trailing, and leading whitespace | |
67 | stripped_tag_string = u' '.join(tag_string.strip().split()) | |
68 | ||
69 | # Split the tag string into a list of tags | |
36c7d934 | 70 | for tag in stripped_tag_string.split(','): |
9061383d | 71 | tag = tag.strip() |
03ae172a | 72 | # Ignore empty or duplicate tags |
9061383d SS |
73 | if tag and tag not in [t['name'] for t in taglist]: |
74 | taglist.append({'name': tag, | |
75 | 'slug': url.slugify(tag)}) | |
03ae172a AW |
76 | return taglist |
77 | ||
ee91c2b8 | 78 | |
03ae172a AW |
79 | def media_tags_as_string(media_entry_tags): |
80 | """ | |
81 | Generate a string from a media item's tags, stored as a list of dicts | |
82 | ||
83 | This is the opposite of convert_to_tag_list_of_dicts | |
84 | """ | |
2a6a3b8c | 85 | tags_string = '' |
03ae172a | 86 | if media_entry_tags: |
2a6a3b8c SS |
87 | tags_string = u', '.join([tag['name'] for tag in media_entry_tags]) |
88 | return tags_string | |
03ae172a | 89 | |
ee91c2b8 | 90 | |
03ae172a AW |
91 | TOO_LONG_TAG_WARNING = \ |
92 | u'Tags must be shorter than %s characters. Tags that are too long: %s' | |
93 | ||
ee91c2b8 | 94 | |
03ae172a AW |
95 | def tag_length_validator(form, field): |
96 | """ | |
97 | Make sure tags do not exceed the maximum tag length. | |
98 | """ | |
99 | tags = convert_to_tag_list_of_dicts(field.data) | |
100 | too_long_tags = [ | |
101 | tag['name'] for tag in tags | |
102 | if len(tag['name']) > mg_globals.app_config['tags_max_length']] | |
103 | ||
104 | if too_long_tags: | |
105 | raise wtforms.ValidationError( | |
36c7d934 | 106 | TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], |
03ae172a AW |
107 | ', '.join(too_long_tags))) |
108 | ||
109 | ||
c0428016 JK |
110 | # Don't use the safe mode, because lxml.html.clean is better and we are using |
111 | # it anyway | |
112 | UNSAFE_MARKDOWN_INSTANCE = markdown.Markdown() | |
03ae172a | 113 | |
ee91c2b8 | 114 | |
03ae172a AW |
115 | def cleaned_markdown_conversion(text): |
116 | """ | |
117 | Take a block of text, run it through MarkDown, and clean its HTML. | |
118 | """ | |
119 | # Markdown will do nothing with and clean_html can do nothing with | |
120 | # an empty string :) | |
121 | if not text: | |
122 | return u'' | |
123 | ||
c0428016 | 124 | return clean_html(UNSAFE_MARKDOWN_INSTANCE.convert(text)) |