Commit | Line | Data |
---|---|---|
8e1e744d | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
e5572c60 ML |
2 | # Copyright (C) 2011 Free Software Foundation, Inc |
3 | # | |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
4d4f6050 | 17 | from email.MIMEText import MIMEText |
b77eec65 CAW |
18 | import gettext |
19 | import pkg_resources | |
4d4f6050 | 20 | import smtplib |
cb8ea0fe | 21 | import sys |
0546833c | 22 | import re |
c5678c1a CAW |
23 | import urllib |
24 | from math import ceil | |
cdf538bd | 25 | from string import strip |
c5678c1a | 26 | import copy |
909371cd | 27 | import wtforms |
c5678c1a | 28 | |
f99f61c6 | 29 | from babel.localedata import exists |
31a8ff42 | 30 | import jinja2 |
0546833c | 31 | import translitcodec |
9150244a | 32 | from webob import Response, exc |
a68ee555 | 33 | from lxml.html.clean import Cleaner |
4bf8e888 | 34 | import markdown |
31a8ff42 | 35 | |
6e7ce8d1 | 36 | from mediagoblin import mg_globals |
22646703 | 37 | from mediagoblin import messages |
c5678c1a | 38 | from mediagoblin.db.util import ObjectId |
29f3fb70 | 39 | |
4d4f6050 CAW |
40 | TESTS_ENABLED = False |
41 | def _activate_testing(): | |
42 | """ | |
43 | Call this to activate testing in util.py | |
44 | """ | |
45 | global TESTS_ENABLED | |
46 | TESTS_ENABLED = True | |
47 | ||
48 | ||
66471f0e CAW |
49 | def clear_test_buckets(): |
50 | """ | |
51 | We store some things for testing purposes that should be cleared | |
52 | when we want a "clean slate" of information for our next round of | |
53 | tests. Call this function to wipe all that stuff clean. | |
54 | ||
55 | Also wipes out some other things we might redefine during testing, | |
56 | like the jinja envs. | |
57 | """ | |
58 | global SETUP_JINJA_ENVS | |
59 | SETUP_JINJA_ENVS = {} | |
60 | ||
61 | global EMAIL_TEST_INBOX | |
62 | global EMAIL_TEST_MBOX_INBOX | |
63 | EMAIL_TEST_INBOX = [] | |
64 | EMAIL_TEST_MBOX_INBOX = [] | |
65 | ||
66 | clear_test_template_context() | |
67 | ||
68 | ||
f99f61c6 CAW |
69 | SETUP_JINJA_ENVS = {} |
70 | ||
71 | ||
0e0e3d9a CAW |
72 | def get_jinja_env(template_loader, locale): |
73 | """ | |
74 | Set up the Jinja environment, | |
75 | ||
76 | (In the future we may have another system for providing theming; | |
77 | for now this is good enough.) | |
78 | """ | |
b77eec65 CAW |
79 | setup_gettext(locale) |
80 | ||
f99f61c6 CAW |
81 | # If we have a jinja environment set up with this locale, just |
82 | # return that one. | |
83 | if SETUP_JINJA_ENVS.has_key(locale): | |
84 | return SETUP_JINJA_ENVS[locale] | |
85 | ||
b77eec65 | 86 | template_env = jinja2.Environment( |
0e0e3d9a | 87 | loader=template_loader, autoescape=True, |
44e2da2f | 88 | extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape']) |
58dec5ef | 89 | |
b77eec65 | 90 | template_env.install_gettext_callables( |
6e7ce8d1 CAW |
91 | mg_globals.translations.gettext, |
92 | mg_globals.translations.ngettext) | |
b77eec65 | 93 | |
22646703 | 94 | # All templates will know how to ... |
22646703 CFD |
95 | # ... fetch all waiting messages and remove them from the queue |
96 | template_env.globals['fetch_messages'] = messages.fetch_messages | |
97 | ||
f99f61c6 CAW |
98 | if exists(locale): |
99 | SETUP_JINJA_ENVS[locale] = template_env | |
100 | ||
b77eec65 CAW |
101 | return template_env |
102 | ||
58dec5ef | 103 | |
e9279f21 CAW |
104 | # We'll store context information here when doing unit tests |
105 | TEMPLATE_TEST_CONTEXT = {} | |
106 | ||
107 | ||
67e8c45d | 108 | def render_template(request, template_path, context): |
e9279f21 CAW |
109 | """ |
110 | Render a template with context. | |
111 | ||
112 | Always inserts the request into the context, so you don't have to. | |
113 | Also stores the context if we're doing unit tests. Helpful! | |
114 | """ | |
115 | template = request.template_env.get_template( | |
67e8c45d | 116 | template_path) |
e9279f21 CAW |
117 | context['request'] = request |
118 | rendered = template.render(context) | |
119 | ||
120 | if TESTS_ENABLED: | |
67e8c45d | 121 | TEMPLATE_TEST_CONTEXT[template_path] = context |
e9279f21 CAW |
122 | |
123 | return rendered | |
124 | ||
125 | ||
126 | def clear_test_template_context(): | |
127 | global TEMPLATE_TEST_CONTEXT | |
128 | TEMPLATE_TEST_CONTEXT = {} | |
129 | ||
130 | ||
1c63ad5d E |
131 | def render_to_response(request, template, context): |
132 | """Much like Django's shortcut.render()""" | |
133 | return Response(render_template(request, template, context)) | |
134 | ||
135 | ||
9150244a E |
136 | def redirect(request, *args, **kwargs): |
137 | """Returns a HTTPFound(), takes a request and then urlgen params""" | |
138 | return exc.HTTPFound(location=request.urlgen(*args, **kwargs)) | |
139 | ||
140 | ||
58dec5ef CAW |
141 | def setup_user_in_request(request): |
142 | """ | |
143 | Examine a request and tack on a request.user parameter if that's | |
144 | appropriate. | |
145 | """ | |
146 | if not request.session.has_key('user_id'): | |
59dd5c7e | 147 | request.user = None |
58dec5ef CAW |
148 | return |
149 | ||
5d6840a0 | 150 | user = None |
6648c52b | 151 | user = request.app.db.User.one( |
254bc431 | 152 | {'_id': ObjectId(request.session['user_id'])}) |
5d6840a0 | 153 | |
c74e1462 CAW |
154 | if not user: |
155 | # Something's wrong... this user doesn't exist? Invalidate | |
156 | # this session. | |
58dec5ef | 157 | request.session.invalidate() |
5d6840a0 CAW |
158 | |
159 | request.user = user | |
cb8ea0fe CAW |
160 | |
161 | ||
162 | def import_component(import_string): | |
163 | """ | |
164 | Import a module component defined by STRING. Probably a method, | |
165 | class, or global variable. | |
166 | ||
167 | Args: | |
168 | - import_string: a string that defines what to import. Written | |
169 | in the format of "module1.module2:component" | |
170 | """ | |
171 | module_name, func_name = import_string.split(':', 1) | |
172 | __import__(module_name) | |
173 | module = sys.modules[module_name] | |
174 | func = getattr(module, func_name) | |
175 | return func | |
4d4f6050 | 176 | |
0546833c AW |
177 | _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') |
178 | ||
179 | def slugify(text, delim=u'-'): | |
180 | """ | |
181 | Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/ | |
182 | """ | |
183 | result = [] | |
184 | for word in _punct_re.split(text.lower()): | |
185 | word = word.encode('translit/long') | |
186 | if word: | |
187 | result.append(word) | |
188 | return unicode(delim.join(result)) | |
4d4f6050 CAW |
189 | |
190 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
191 | ### Special email test stuff begins HERE | |
192 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
193 | ||
194 | # We have two "test inboxes" here: | |
195 | # | |
196 | # EMAIL_TEST_INBOX: | |
197 | # ---------------- | |
198 | # If you're writing test views, you'll probably want to check this. | |
199 | # It contains a list of MIMEText messages. | |
200 | # | |
201 | # EMAIL_TEST_MBOX_INBOX: | |
202 | # ---------------------- | |
203 | # This collects the messages from the FakeMhost inbox. It's reslly | |
204 | # just here for testing the send_email method itself. | |
205 | # | |
206 | # Anyway this contains: | |
207 | # - from | |
208 | # - to: a list of email recipient addresses | |
209 | # - message: not just the body, but the whole message, including | |
210 | # headers, etc. | |
211 | # | |
212 | # ***IMPORTANT!*** | |
213 | # ---------------- | |
214 | # Before running tests that call functions which send email, you should | |
215 | # always call _clear_test_inboxes() to "wipe" the inboxes clean. | |
216 | ||
217 | EMAIL_TEST_INBOX = [] | |
218 | EMAIL_TEST_MBOX_INBOX = [] | |
219 | ||
220 | ||
221 | class FakeMhost(object): | |
222 | """ | |
223 | Just a fake mail host so we can capture and test messages | |
224 | from send_email | |
225 | """ | |
226 | def connect(self): | |
227 | pass | |
228 | ||
229 | def sendmail(self, from_addr, to_addrs, message): | |
230 | EMAIL_TEST_MBOX_INBOX.append( | |
231 | {'from': from_addr, | |
232 | 'to': to_addrs, | |
233 | 'message': message}) | |
234 | ||
235 | def _clear_test_inboxes(): | |
236 | global EMAIL_TEST_INBOX | |
237 | global EMAIL_TEST_MBOX_INBOX | |
238 | EMAIL_TEST_INBOX = [] | |
239 | EMAIL_TEST_MBOX_INBOX = [] | |
240 | ||
241 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
242 | ### </Special email test stuff> | |
243 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
244 | ||
245 | def send_email(from_addr, to_addrs, subject, message_body): | |
61ec968b CAW |
246 | """ |
247 | Simple email sending wrapper, use this so we can capture messages | |
248 | for unit testing purposes. | |
249 | ||
250 | Args: | |
251 | - from_addr: address you're sending the email from | |
252 | - to_addrs: list of recipient email addresses | |
253 | - subject: subject of the email | |
254 | - message_body: email body text | |
255 | """ | |
4d4f6050 | 256 | # TODO: make a mock mhost if testing is enabled |
6e7ce8d1 | 257 | if TESTS_ENABLED or mg_globals.email_debug_mode: |
4d4f6050 | 258 | mhost = FakeMhost() |
6e7ce8d1 | 259 | elif not mg_globals.email_debug_mode: |
4d4f6050 CAW |
260 | mhost = smtplib.SMTP() |
261 | ||
262 | mhost.connect() | |
263 | ||
264 | message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8') | |
265 | message['Subject'] = subject | |
266 | message['From'] = from_addr | |
267 | message['To'] = ', '.join(to_addrs) | |
268 | ||
269 | if TESTS_ENABLED: | |
270 | EMAIL_TEST_INBOX.append(message) | |
271 | ||
6e7ce8d1 | 272 | if getattr(mg_globals, 'email_debug_mode', False): |
29f3fb70 CAW |
273 | print u"===== Email =====" |
274 | print u"From address: %s" % message['From'] | |
275 | print u"To addresses: %s" % message['To'] | |
276 | print u"Subject: %s" % message['Subject'] | |
277 | print u"-- Body: --" | |
278 | print message.get_payload(decode=True) | |
279 | ||
21919313 | 280 | return mhost.sendmail(from_addr, to_addrs, message.as_string()) |
20c834ff | 281 | |
8b28bee4 CAW |
282 | |
283 | ################### | |
284 | # Translation tools | |
285 | ################### | |
286 | ||
287 | ||
b77eec65 CAW |
288 | TRANSLATIONS_PATH = pkg_resources.resource_filename( |
289 | 'mediagoblin', 'translations') | |
290 | ||
291 | ||
8b28bee4 CAW |
292 | def locale_to_lower_upper(locale): |
293 | """ | |
294 | Take a locale, regardless of style, and format it like "en-us" | |
295 | """ | |
296 | if '-' in locale: | |
297 | lang, country = locale.split('-', 1) | |
298 | return '%s_%s' % (lang.lower(), country.upper()) | |
299 | elif '_' in locale: | |
300 | lang, country = locale.split('_', 1) | |
301 | return '%s_%s' % (lang.lower(), country.upper()) | |
302 | else: | |
303 | return locale.lower() | |
304 | ||
305 | ||
306 | def locale_to_lower_lower(locale): | |
307 | """ | |
308 | Take a locale, regardless of style, and format it like "en_US" | |
309 | """ | |
310 | if '_' in locale: | |
311 | lang, country = locale.split('_', 1) | |
312 | return '%s-%s' % (lang.lower(), country.lower()) | |
313 | else: | |
314 | return locale.lower() | |
315 | ||
316 | ||
317 | def get_locale_from_request(request): | |
318 | """ | |
319 | Figure out what target language is most appropriate based on the | |
320 | request | |
321 | """ | |
322 | request_form = request.GET or request.POST | |
323 | ||
324 | if request_form.has_key('lang'): | |
325 | return locale_to_lower_upper(request_form['lang']) | |
326 | ||
327 | accept_lang_matches = request.accept_language.best_matches() | |
328 | ||
329 | # Your routing can explicitly specify a target language | |
376e6ef2 CAW |
330 | if request.matchdict.has_key('locale'): |
331 | target_lang = request.matchdict['locale'] | |
8b28bee4 CAW |
332 | elif request.session.has_key('target_lang'): |
333 | target_lang = request.session['target_lang'] | |
334 | # Pull the first acceptable language | |
335 | elif accept_lang_matches: | |
336 | target_lang = accept_lang_matches[0] | |
337 | # Fall back to English | |
338 | else: | |
339 | target_lang = 'en' | |
340 | ||
0e0e3d9a | 341 | return locale_to_lower_upper(target_lang) |
b77eec65 CAW |
342 | |
343 | ||
a68ee555 CAW |
344 | # A super strict version of the lxml.html cleaner class |
345 | HTML_CLEANER = Cleaner( | |
346 | scripts=True, | |
347 | javascript=True, | |
348 | comments=True, | |
349 | style=True, | |
350 | links=True, | |
351 | page_structure=True, | |
352 | processing_instructions=True, | |
353 | embedded=True, | |
354 | frames=True, | |
355 | forms=True, | |
356 | annoying_tags=True, | |
357 | allow_tags=[ | |
358 | 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'], | |
359 | remove_unknown_tags=False, # can't be used with allow_tags | |
360 | safe_attrs_only=True, | |
361 | add_nofollow=True, # for now | |
362 | host_whitelist=(), | |
363 | whitelist_tags=set([])) | |
364 | ||
365 | ||
366 | def clean_html(html): | |
4fd18da0 CAW |
367 | # clean_html barfs on an empty string |
368 | if not html: | |
369 | return u'' | |
370 | ||
a68ee555 CAW |
371 | return HTML_CLEANER.clean_html(html) |
372 | ||
373 | ||
0712a06d | 374 | def convert_to_tag_list_of_dicts(tag_string): |
cdf538bd | 375 | """ |
909371cd | 376 | Filter input from incoming string containing user tags, |
4bf8e888 | 377 | |
cdf538bd | 378 | Strips trailing, leading, and internal whitespace, and also converts |
cc7ff3c5 | 379 | the "tags" text into an array of tags |
cdf538bd | 380 | """ |
6f2e4585 | 381 | taglist = [] |
cdf538bd | 382 | if tag_string: |
cc7ff3c5 CFD |
383 | |
384 | # Strip out internal, trailing, and leading whitespace | |
93e3468a | 385 | stripped_tag_string = u' '.join(tag_string.strip().split()) |
cc7ff3c5 CFD |
386 | |
387 | # Split the tag string into a list of tags | |
10d7496d CFD |
388 | for tag in stripped_tag_string.split( |
389 | mg_globals.app_config['tags_delimiter']): | |
cc7ff3c5 | 390 | |
f99b5cae CFD |
391 | # Ignore empty or duplicate tags |
392 | if tag.strip() and tag.strip() not in [t['name'] for t in taglist]: | |
cc7ff3c5 | 393 | |
10d7496d | 394 | if mg_globals.app_config['tags_case_sensitive']: |
0712a06d CFD |
395 | taglist.append({'name': tag.strip(), |
396 | 'slug': slugify(tag.strip())}) | |
6f2e4585 | 397 | else: |
0712a06d CFD |
398 | taglist.append({'name': tag.strip().lower(), |
399 | 'slug': slugify(tag.strip().lower())}) | |
6f2e4585 | 400 | return taglist |
cdf538bd CFD |
401 | |
402 | ||
0712a06d CFD |
403 | def media_tags_as_string(media_entry_tags): |
404 | """ | |
405 | Generate a string from a media item's tags, stored as a list of dicts | |
406 | ||
407 | This is the opposite of convert_to_tag_list_of_dicts | |
408 | """ | |
409 | media_tag_string = '' | |
410 | if media_entry_tags: | |
411 | media_tag_string = mg_globals.app_config['tags_delimiter'].join( | |
412 | [tag['name'] for tag in media_entry_tags]) | |
413 | return media_tag_string | |
414 | ||
909371cd CFD |
415 | TOO_LONG_TAG_WARNING = \ |
416 | u'Tags must be shorter than %s characters. Tags that are too long: %s' | |
417 | ||
418 | def tag_length_validator(form, field): | |
419 | """ | |
420 | Make sure tags do not exceed the maximum tag length. | |
421 | """ | |
0712a06d | 422 | tags = convert_to_tag_list_of_dicts(field.data) |
909371cd | 423 | too_long_tags = [ |
0712a06d CFD |
424 | tag['name'] for tag in tags |
425 | if len(tag['name']) > mg_globals.app_config['tags_max_length']] | |
909371cd CFD |
426 | |
427 | if too_long_tags: | |
428 | raise wtforms.ValidationError( | |
10d7496d CFD |
429 | TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \ |
430 | ', '.join(too_long_tags))) | |
909371cd CFD |
431 | |
432 | ||
cdf538bd | 433 | MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape') |
4bf8e888 CAW |
434 | |
435 | def cleaned_markdown_conversion(text): | |
436 | """ | |
437 | Take a block of text, run it through MarkDown, and clean its HTML. | |
438 | """ | |
82688846 CAW |
439 | # Markdown will do nothing with and clean_html can do nothing with |
440 | # an empty string :) | |
441 | if not text: | |
442 | return u'' | |
443 | ||
4bf8e888 CAW |
444 | return clean_html(MARKDOWN_INSTANCE.convert(text)) |
445 | ||
446 | ||
f99f61c6 CAW |
447 | SETUP_GETTEXTS = {} |
448 | ||
b77eec65 CAW |
449 | def setup_gettext(locale): |
450 | """ | |
451 | Setup the gettext instance based on this locale | |
452 | """ | |
453 | # Later on when we have plugins we may want to enable the | |
454 | # multi-translations system they have so we can handle plugin | |
455 | # translations too | |
456 | ||
457 | # TODO: fallback nicely on translations from pt_PT to pt if not | |
458 | # available, etc. | |
f99f61c6 CAW |
459 | if SETUP_GETTEXTS.has_key(locale): |
460 | this_gettext = SETUP_GETTEXTS[locale] | |
461 | else: | |
462 | this_gettext = gettext.translation( | |
463 | 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True) | |
464 | if exists(locale): | |
465 | SETUP_GETTEXTS[locale] = this_gettext | |
b77eec65 | 466 | |
6e7ce8d1 | 467 | mg_globals.setup_globals( |
b77eec65 | 468 | translations=this_gettext) |
ae85ed0f BK |
469 | |
470 | ||
b9e9610b CAW |
471 | PAGINATION_DEFAULT_PER_PAGE = 30 |
472 | ||
ae85ed0f BK |
473 | class Pagination(object): |
474 | """ | |
dffa0b09 CAW |
475 | Pagination class for mongodb queries. |
476 | ||
477 | Initialization through __init__(self, cursor, page=1, per_page=2), | |
478 | get actual data slice through __call__(). | |
ae85ed0f | 479 | """ |
ca3ca51c | 480 | |
b9e9610b | 481 | def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE): |
44e3e917 | 482 | """ |
a98d5254 CAW |
483 | Initializes Pagination |
484 | ||
485 | Args: | |
486 | - page: requested page | |
487 | - per_page: number of objects per page | |
488 | - cursor: db cursor | |
44e3e917 BK |
489 | """ |
490 | self.page = page | |
ca3ca51c BK |
491 | self.per_page = per_page |
492 | self.cursor = cursor | |
ca3ca51c BK |
493 | self.total_count = self.cursor.count() |
494 | ||
495 | def __call__(self): | |
44e3e917 | 496 | """ |
a98d5254 | 497 | Returns slice of objects for the requested page |
44e3e917 | 498 | """ |
140e2102 CAW |
499 | return self.cursor.skip( |
500 | (self.page - 1) * self.per_page).limit(self.per_page) | |
ae85ed0f BK |
501 | |
502 | @property | |
503 | def pages(self): | |
504 | return int(ceil(self.total_count / float(self.per_page))) | |
505 | ||
506 | @property | |
507 | def has_prev(self): | |
508 | return self.page > 1 | |
509 | ||
510 | @property | |
511 | def has_next(self): | |
512 | return self.page < self.pages | |
513 | ||
514 | def iter_pages(self, left_edge=2, left_current=2, | |
515 | right_current=5, right_edge=2): | |
516 | last = 0 | |
517 | for num in xrange(1, self.pages + 1): | |
518 | if num <= left_edge or \ | |
519 | (num > self.page - left_current - 1 and \ | |
520 | num < self.page + right_current) or \ | |
521 | num > self.pages - right_edge: | |
522 | if last + 1 != num: | |
523 | yield None | |
524 | yield num | |
525 | last = num | |
44e3e917 | 526 | |
50c880ac | 527 | def get_page_url_explicit(self, base_url, get_params, page_no): |
44e3e917 | 528 | """ |
50c880ac | 529 | Get a page url by adding a page= parameter to the base url |
44e3e917 BK |
530 | """ |
531 | new_get_params = copy.copy(get_params or {}) | |
532 | new_get_params['page'] = page_no | |
533 | return "%s?%s" % ( | |
50c880ac CAW |
534 | base_url, urllib.urlencode(new_get_params)) |
535 | ||
536 | def get_page_url(self, request, page_no): | |
537 | """ | |
538 | Get a new page url based of the request, and the new page number. | |
539 | ||
540 | This is a nice wrapper around get_page_url_explicit() | |
541 | """ | |
542 | return self.get_page_url_explicit( | |
543 | request.path_info, request.GET, page_no) |