508. Updates copyright/license information
[mediagoblin.git] / mediagoblin / util.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from __future__ import division
18
19 from email.MIMEText import MIMEText
20 import gettext
21 import pkg_resources
22 import smtplib
23 import sys
24 import re
25 import urllib
26 from math import ceil, floor
27 import copy
28 import wtforms
29
30 from babel.localedata import exists
31 from babel.support import LazyProxy
32 import jinja2
33 import translitcodec
34 from webob import Response, exc
35 from lxml.html.clean import Cleaner
36 import markdown
37 from wtforms.form import Form
38
39 from mediagoblin import mg_globals
40 from mediagoblin import messages
41 from mediagoblin.db.util import ObjectId
42
43 from itertools import izip, count
44
45 DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
46
47 TESTS_ENABLED = False
48 def _activate_testing():
49 """
50 Call this to activate testing in util.py
51 """
52 global TESTS_ENABLED
53 TESTS_ENABLED = True
54
55
56 def clear_test_buckets():
57 """
58 We store some things for testing purposes that should be cleared
59 when we want a "clean slate" of information for our next round of
60 tests. Call this function to wipe all that stuff clean.
61
62 Also wipes out some other things we might redefine during testing,
63 like the jinja envs.
64 """
65 global SETUP_JINJA_ENVS
66 SETUP_JINJA_ENVS = {}
67
68 global EMAIL_TEST_INBOX
69 global EMAIL_TEST_MBOX_INBOX
70 EMAIL_TEST_INBOX = []
71 EMAIL_TEST_MBOX_INBOX = []
72
73 clear_test_template_context()
74
75
76 SETUP_JINJA_ENVS = {}
77
78
79 def get_jinja_env(template_loader, locale):
80 """
81 Set up the Jinja environment,
82
83 (In the future we may have another system for providing theming;
84 for now this is good enough.)
85 """
86 setup_gettext(locale)
87
88 # If we have a jinja environment set up with this locale, just
89 # return that one.
90 if SETUP_JINJA_ENVS.has_key(locale):
91 return SETUP_JINJA_ENVS[locale]
92
93 template_env = jinja2.Environment(
94 loader=template_loader, autoescape=True,
95 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
96
97 template_env.install_gettext_callables(
98 mg_globals.translations.ugettext,
99 mg_globals.translations.ungettext)
100
101 # All templates will know how to ...
102 # ... fetch all waiting messages and remove them from the queue
103 # ... construct a grid of thumbnails or other media
104 template_env.globals['fetch_messages'] = messages.fetch_messages
105 template_env.globals['gridify_list'] = gridify_list
106 template_env.globals['gridify_cursor'] = gridify_cursor
107
108 if exists(locale):
109 SETUP_JINJA_ENVS[locale] = template_env
110
111 return template_env
112
113
114 # We'll store context information here when doing unit tests
115 TEMPLATE_TEST_CONTEXT = {}
116
117
118 def render_template(request, template_path, context):
119 """
120 Render a template with context.
121
122 Always inserts the request into the context, so you don't have to.
123 Also stores the context if we're doing unit tests. Helpful!
124 """
125 template = request.template_env.get_template(
126 template_path)
127 context['request'] = request
128 rendered = template.render(context)
129
130 if TESTS_ENABLED:
131 TEMPLATE_TEST_CONTEXT[template_path] = context
132
133 return rendered
134
135
136 def clear_test_template_context():
137 global TEMPLATE_TEST_CONTEXT
138 TEMPLATE_TEST_CONTEXT = {}
139
140
141 def render_to_response(request, template, context, status=200):
142 """Much like Django's shortcut.render()"""
143 return Response(
144 render_template(request, template, context),
145 status=status)
146
147
148 def redirect(request, *args, **kwargs):
149 """Returns a HTTPFound(), takes a request and then urlgen params"""
150
151 querystring = None
152 if kwargs.get('querystring'):
153 querystring = kwargs.get('querystring')
154 del kwargs['querystring']
155
156 return exc.HTTPFound(
157 location=''.join([
158 request.urlgen(*args, **kwargs),
159 querystring if querystring else '']))
160
161
162 def setup_user_in_request(request):
163 """
164 Examine a request and tack on a request.user parameter if that's
165 appropriate.
166 """
167 if not request.session.has_key('user_id'):
168 request.user = None
169 return
170
171 user = None
172 user = request.app.db.User.one(
173 {'_id': ObjectId(request.session['user_id'])})
174
175 if not user:
176 # Something's wrong... this user doesn't exist? Invalidate
177 # this session.
178 request.session.invalidate()
179
180 request.user = user
181
182
183 def import_component(import_string):
184 """
185 Import a module component defined by STRING. Probably a method,
186 class, or global variable.
187
188 Args:
189 - import_string: a string that defines what to import. Written
190 in the format of "module1.module2:component"
191 """
192 module_name, func_name = import_string.split(':', 1)
193 __import__(module_name)
194 module = sys.modules[module_name]
195 func = getattr(module, func_name)
196 return func
197
198 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
199
200 def slugify(text, delim=u'-'):
201 """
202 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
203 """
204 result = []
205 for word in _punct_re.split(text.lower()):
206 word = word.encode('translit/long')
207 if word:
208 result.append(word)
209 return unicode(delim.join(result))
210
211 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
212 ### Special email test stuff begins HERE
213 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
214
215 # We have two "test inboxes" here:
216 #
217 # EMAIL_TEST_INBOX:
218 # ----------------
219 # If you're writing test views, you'll probably want to check this.
220 # It contains a list of MIMEText messages.
221 #
222 # EMAIL_TEST_MBOX_INBOX:
223 # ----------------------
224 # This collects the messages from the FakeMhost inbox. It's reslly
225 # just here for testing the send_email method itself.
226 #
227 # Anyway this contains:
228 # - from
229 # - to: a list of email recipient addresses
230 # - message: not just the body, but the whole message, including
231 # headers, etc.
232 #
233 # ***IMPORTANT!***
234 # ----------------
235 # Before running tests that call functions which send email, you should
236 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
237
238 EMAIL_TEST_INBOX = []
239 EMAIL_TEST_MBOX_INBOX = []
240
241
242 class FakeMhost(object):
243 """
244 Just a fake mail host so we can capture and test messages
245 from send_email
246 """
247 def login(self, *args, **kwargs):
248 pass
249
250 def sendmail(self, from_addr, to_addrs, message):
251 EMAIL_TEST_MBOX_INBOX.append(
252 {'from': from_addr,
253 'to': to_addrs,
254 'message': message})
255
256 def _clear_test_inboxes():
257 global EMAIL_TEST_INBOX
258 global EMAIL_TEST_MBOX_INBOX
259 EMAIL_TEST_INBOX = []
260 EMAIL_TEST_MBOX_INBOX = []
261
262 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
263 ### </Special email test stuff>
264 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
265
266 def send_email(from_addr, to_addrs, subject, message_body):
267 """
268 Simple email sending wrapper, use this so we can capture messages
269 for unit testing purposes.
270
271 Args:
272 - from_addr: address you're sending the email from
273 - to_addrs: list of recipient email addresses
274 - subject: subject of the email
275 - message_body: email body text
276 """
277 if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
278 mhost = FakeMhost()
279 elif not mg_globals.app_config['email_debug_mode']:
280 mhost = smtplib.SMTP(
281 mg_globals.app_config['email_smtp_host'],
282 mg_globals.app_config['email_smtp_port'])
283
284 # SMTP.__init__ Issues SMTP.connect implicitly if host
285 if not mg_globals.app_config['email_smtp_host']: # e.g. host = ''
286 mhost.connect() # We SMTP.connect explicitly
287
288 if mg_globals.app_config['email_smtp_user'] \
289 or mg_globals.app_config['email_smtp_pass']:
290 mhost.login(
291 mg_globals.app_config['email_smtp_user'],
292 mg_globals.app_config['email_smtp_pass'])
293
294 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
295 message['Subject'] = subject
296 message['From'] = from_addr
297 message['To'] = ', '.join(to_addrs)
298
299 if TESTS_ENABLED:
300 EMAIL_TEST_INBOX.append(message)
301
302 if mg_globals.app_config['email_debug_mode']:
303 print u"===== Email ====="
304 print u"From address: %s" % message['From']
305 print u"To addresses: %s" % message['To']
306 print u"Subject: %s" % message['Subject']
307 print u"-- Body: --"
308 print message.get_payload(decode=True)
309
310 return mhost.sendmail(from_addr, to_addrs, message.as_string())
311
312
313 ###################
314 # Translation tools
315 ###################
316
317
318 TRANSLATIONS_PATH = pkg_resources.resource_filename(
319 'mediagoblin', 'i18n')
320
321
322 def locale_to_lower_upper(locale):
323 """
324 Take a locale, regardless of style, and format it like "en-us"
325 """
326 if '-' in locale:
327 lang, country = locale.split('-', 1)
328 return '%s_%s' % (lang.lower(), country.upper())
329 elif '_' in locale:
330 lang, country = locale.split('_', 1)
331 return '%s_%s' % (lang.lower(), country.upper())
332 else:
333 return locale.lower()
334
335
336 def locale_to_lower_lower(locale):
337 """
338 Take a locale, regardless of style, and format it like "en_US"
339 """
340 if '_' in locale:
341 lang, country = locale.split('_', 1)
342 return '%s-%s' % (lang.lower(), country.lower())
343 else:
344 return locale.lower()
345
346
347 def get_locale_from_request(request):
348 """
349 Figure out what target language is most appropriate based on the
350 request
351 """
352 request_form = request.GET or request.POST
353
354 if request_form.has_key('lang'):
355 return locale_to_lower_upper(request_form['lang'])
356
357 accept_lang_matches = request.accept_language.best_matches()
358
359 # Your routing can explicitly specify a target language
360 matchdict = request.matchdict or {}
361
362 if matchdict.has_key('locale'):
363 target_lang = matchdict['locale']
364 elif request.session.has_key('target_lang'):
365 target_lang = request.session['target_lang']
366 # Pull the first acceptable language
367 elif accept_lang_matches:
368 target_lang = accept_lang_matches[0]
369 # Fall back to English
370 else:
371 target_lang = 'en'
372
373 return locale_to_lower_upper(target_lang)
374
375
376 # A super strict version of the lxml.html cleaner class
377 HTML_CLEANER = Cleaner(
378 scripts=True,
379 javascript=True,
380 comments=True,
381 style=True,
382 links=True,
383 page_structure=True,
384 processing_instructions=True,
385 embedded=True,
386 frames=True,
387 forms=True,
388 annoying_tags=True,
389 allow_tags=[
390 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
391 remove_unknown_tags=False, # can't be used with allow_tags
392 safe_attrs_only=True,
393 add_nofollow=True, # for now
394 host_whitelist=(),
395 whitelist_tags=set([]))
396
397
398 def clean_html(html):
399 # clean_html barfs on an empty string
400 if not html:
401 return u''
402
403 return HTML_CLEANER.clean_html(html)
404
405
406 def convert_to_tag_list_of_dicts(tag_string):
407 """
408 Filter input from incoming string containing user tags,
409
410 Strips trailing, leading, and internal whitespace, and also converts
411 the "tags" text into an array of tags
412 """
413 taglist = []
414 if tag_string:
415
416 # Strip out internal, trailing, and leading whitespace
417 stripped_tag_string = u' '.join(tag_string.strip().split())
418
419 # Split the tag string into a list of tags
420 for tag in stripped_tag_string.split(
421 mg_globals.app_config['tags_delimiter']):
422
423 # Ignore empty or duplicate tags
424 if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
425
426 taglist.append({'name': tag.strip(),
427 'slug': slugify(tag.strip())})
428 return taglist
429
430
431 def media_tags_as_string(media_entry_tags):
432 """
433 Generate a string from a media item's tags, stored as a list of dicts
434
435 This is the opposite of convert_to_tag_list_of_dicts
436 """
437 media_tag_string = ''
438 if media_entry_tags:
439 media_tag_string = mg_globals.app_config['tags_delimiter'].join(
440 [tag['name'] for tag in media_entry_tags])
441 return media_tag_string
442
443 TOO_LONG_TAG_WARNING = \
444 u'Tags must be shorter than %s characters. Tags that are too long: %s'
445
446 def tag_length_validator(form, field):
447 """
448 Make sure tags do not exceed the maximum tag length.
449 """
450 tags = convert_to_tag_list_of_dicts(field.data)
451 too_long_tags = [
452 tag['name'] for tag in tags
453 if len(tag['name']) > mg_globals.app_config['tags_max_length']]
454
455 if too_long_tags:
456 raise wtforms.ValidationError(
457 TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
458 ', '.join(too_long_tags)))
459
460
461 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
462
463 def cleaned_markdown_conversion(text):
464 """
465 Take a block of text, run it through MarkDown, and clean its HTML.
466 """
467 # Markdown will do nothing with and clean_html can do nothing with
468 # an empty string :)
469 if not text:
470 return u''
471
472 return clean_html(MARKDOWN_INSTANCE.convert(text))
473
474
475 SETUP_GETTEXTS = {}
476
477 def setup_gettext(locale):
478 """
479 Setup the gettext instance based on this locale
480 """
481 # Later on when we have plugins we may want to enable the
482 # multi-translations system they have so we can handle plugin
483 # translations too
484
485 # TODO: fallback nicely on translations from pt_PT to pt if not
486 # available, etc.
487 if SETUP_GETTEXTS.has_key(locale):
488 this_gettext = SETUP_GETTEXTS[locale]
489 else:
490 this_gettext = gettext.translation(
491 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
492 if exists(locale):
493 SETUP_GETTEXTS[locale] = this_gettext
494
495 mg_globals.setup_globals(
496 translations=this_gettext)
497
498
499 # Force en to be setup before anything else so that
500 # mg_globals.translations is never None
501 setup_gettext('en')
502
503
504 def pass_to_ugettext(*args, **kwargs):
505 """
506 Pass a translation on to the appropriate ugettext method.
507
508 The reason we can't have a global ugettext method is because
509 mg_globals gets swapped out by the application per-request.
510 """
511 return mg_globals.translations.ugettext(
512 *args, **kwargs)
513
514
515 def lazy_pass_to_ugettext(*args, **kwargs):
516 """
517 Lazily pass to ugettext.
518
519 This is useful if you have to define a translation on a module
520 level but you need it to not translate until the time that it's
521 used as a string.
522 """
523 return LazyProxy(pass_to_ugettext, *args, **kwargs)
524
525
526 def pass_to_ngettext(*args, **kwargs):
527 """
528 Pass a translation on to the appropriate ngettext method.
529
530 The reason we can't have a global ngettext method is because
531 mg_globals gets swapped out by the application per-request.
532 """
533 return mg_globals.translations.ngettext(
534 *args, **kwargs)
535
536
537 def lazy_pass_to_ngettext(*args, **kwargs):
538 """
539 Lazily pass to ngettext.
540
541 This is useful if you have to define a translation on a module
542 level but you need it to not translate until the time that it's
543 used as a string.
544 """
545 return LazyProxy(pass_to_ngettext, *args, **kwargs)
546
547
548 def fake_ugettext_passthrough(string):
549 """
550 Fake a ugettext call for extraction's sake ;)
551
552 In wtforms there's a separate way to define a method to translate
553 things... so we just need to mark up the text so that it can be
554 extracted, not so that it's actually run through gettext.
555 """
556 return string
557
558
559 PAGINATION_DEFAULT_PER_PAGE = 30
560
561 class Pagination(object):
562 """
563 Pagination class for mongodb queries.
564
565 Initialization through __init__(self, cursor, page=1, per_page=2),
566 get actual data slice through __call__().
567 """
568
569 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
570 jump_to_id=False):
571 """
572 Initializes Pagination
573
574 Args:
575 - page: requested page
576 - per_page: number of objects per page
577 - cursor: db cursor
578 - jump_to_id: ObjectId, sets the page to the page containing the object
579 with _id == jump_to_id.
580 """
581 self.page = page
582 self.per_page = per_page
583 self.cursor = cursor
584 self.total_count = self.cursor.count()
585 self.active_id = None
586
587 if jump_to_id:
588 cursor = copy.copy(self.cursor)
589
590 for (doc, increment) in izip(cursor, count(0)):
591 if doc['_id'] == jump_to_id:
592 self.page = 1 + int(floor(increment / self.per_page))
593
594 self.active_id = jump_to_id
595 break
596
597
598 def __call__(self):
599 """
600 Returns slice of objects for the requested page
601 """
602 return self.cursor.skip(
603 (self.page - 1) * self.per_page).limit(self.per_page)
604
605 @property
606 def pages(self):
607 return int(ceil(self.total_count / float(self.per_page)))
608
609 @property
610 def has_prev(self):
611 return self.page > 1
612
613 @property
614 def has_next(self):
615 return self.page < self.pages
616
617 def iter_pages(self, left_edge=2, left_current=2,
618 right_current=5, right_edge=2):
619 last = 0
620 for num in xrange(1, self.pages + 1):
621 if num <= left_edge or \
622 (num > self.page - left_current - 1 and \
623 num < self.page + right_current) or \
624 num > self.pages - right_edge:
625 if last + 1 != num:
626 yield None
627 yield num
628 last = num
629
630 def get_page_url_explicit(self, base_url, get_params, page_no):
631 """
632 Get a page url by adding a page= parameter to the base url
633 """
634 new_get_params = copy.copy(get_params or {})
635 new_get_params['page'] = page_no
636 return "%s?%s" % (
637 base_url, urllib.urlencode(new_get_params))
638
639 def get_page_url(self, request, page_no):
640 """
641 Get a new page url based of the request, and the new page number.
642
643 This is a nice wrapper around get_page_url_explicit()
644 """
645 return self.get_page_url_explicit(
646 request.path_info, request.GET, page_no)
647
648
649 def gridify_list(this_list, num_cols=5):
650 """
651 Generates a list of lists where each sub-list's length depends on
652 the number of columns in the list
653 """
654 grid = []
655
656 # Figure out how many rows we should have
657 num_rows = int(ceil(float(len(this_list)) / num_cols))
658
659 for row_num in range(num_rows):
660 slice_min = row_num * num_cols
661 slice_max = (row_num + 1) * num_cols
662
663 row = this_list[slice_min:slice_max]
664
665 grid.append(row)
666
667 return grid
668
669
670 def gridify_cursor(this_cursor, num_cols=5):
671 """
672 Generates a list of lists where each sub-list's length depends on
673 the number of columns in the list
674 """
675 return gridify_list(list(this_cursor), num_cols)
676
677
678 def render_404(request):
679 """
680 Render a 404.
681 """
682 return render_to_response(
683 request, 'mediagoblin/404.html', {}, status=400)
684
685 def delete_media_files(media):
686 """
687 Delete all files associated with a MediaEntry
688
689 Arguments:
690 - media: A MediaEntry document
691 """
692 for handle, listpath in media['media_files'].items():
693 mg_globals.public_store.delete_file(
694 listpath)
695
696 for attachment in media['attachment_files']:
697 mg_globals.public_store.delete_file(
698 attachment['filepath'])