has_key is deprecated, converting uses to use "in" operator.
[mediagoblin.git] / mediagoblin / util.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from __future__ import division
18
19 from email.MIMEText import MIMEText
20 import gettext
21 import pkg_resources
22 import smtplib
23 import sys
24 import re
25 import urllib
26 from math import ceil, floor
27 import copy
28 import wtforms
29
30 from babel.localedata import exists
31 from babel.support import LazyProxy
32 import jinja2
33 import translitcodec
34 from webob import Response, exc
35 from lxml.html.clean import Cleaner
36 import markdown
37 from wtforms.form import Form
38
39 from mediagoblin import mg_globals
40 from mediagoblin import messages
41 from mediagoblin.db.util import ObjectId
42
43 from itertools import izip, count
44
45 DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
46
47 TESTS_ENABLED = False
48
49
50 def _activate_testing():
51 """
52 Call this to activate testing in util.py
53 """
54 global TESTS_ENABLED
55 TESTS_ENABLED = True
56
57
58 def clear_test_buckets():
59 """
60 We store some things for testing purposes that should be cleared
61 when we want a "clean slate" of information for our next round of
62 tests. Call this function to wipe all that stuff clean.
63
64 Also wipes out some other things we might redefine during testing,
65 like the jinja envs.
66 """
67 global SETUP_JINJA_ENVS
68 SETUP_JINJA_ENVS = {}
69
70 global EMAIL_TEST_INBOX
71 global EMAIL_TEST_MBOX_INBOX
72 EMAIL_TEST_INBOX = []
73 EMAIL_TEST_MBOX_INBOX = []
74
75 clear_test_template_context()
76
77
78 SETUP_JINJA_ENVS = {}
79
80
81 def get_jinja_env(template_loader, locale):
82 """
83 Set up the Jinja environment,
84
85 (In the future we may have another system for providing theming;
86 for now this is good enough.)
87 """
88 setup_gettext(locale)
89
90 # If we have a jinja environment set up with this locale, just
91 # return that one.
92 if locale in SETUP_JINJA_ENVS:
93 return SETUP_JINJA_ENVS[locale]
94
95 template_env = jinja2.Environment(
96 loader=template_loader, autoescape=True,
97 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
98
99 template_env.install_gettext_callables(
100 mg_globals.translations.ugettext,
101 mg_globals.translations.ungettext)
102
103 # All templates will know how to ...
104 # ... fetch all waiting messages and remove them from the queue
105 # ... construct a grid of thumbnails or other media
106 template_env.globals['fetch_messages'] = messages.fetch_messages
107 template_env.globals['gridify_list'] = gridify_list
108 template_env.globals['gridify_cursor'] = gridify_cursor
109
110 if exists(locale):
111 SETUP_JINJA_ENVS[locale] = template_env
112
113 return template_env
114
115
116 # We'll store context information here when doing unit tests
117 TEMPLATE_TEST_CONTEXT = {}
118
119
120 def render_template(request, template_path, context):
121 """
122 Render a template with context.
123
124 Always inserts the request into the context, so you don't have to.
125 Also stores the context if we're doing unit tests. Helpful!
126 """
127 template = request.template_env.get_template(
128 template_path)
129 context['request'] = request
130 rendered = template.render(context)
131
132 if TESTS_ENABLED:
133 TEMPLATE_TEST_CONTEXT[template_path] = context
134
135 return rendered
136
137
138 def clear_test_template_context():
139 global TEMPLATE_TEST_CONTEXT
140 TEMPLATE_TEST_CONTEXT = {}
141
142
143 def render_to_response(request, template, context, status=200):
144 """Much like Django's shortcut.render()"""
145 return Response(
146 render_template(request, template, context),
147 status=status)
148
149
150 def redirect(request, *args, **kwargs):
151 """Returns a HTTPFound(), takes a request and then urlgen params"""
152
153 querystring = None
154 if kwargs.get('querystring'):
155 querystring = kwargs.get('querystring')
156 del kwargs['querystring']
157
158 return exc.HTTPFound(
159 location=''.join([
160 request.urlgen(*args, **kwargs),
161 querystring if querystring else '']))
162
163
164 def setup_user_in_request(request):
165 """
166 Examine a request and tack on a request.user parameter if that's
167 appropriate.
168 """
169 if not 'user_id' in request.session:
170 request.user = None
171 return
172
173 user = None
174 user = request.app.db.User.one(
175 {'_id': ObjectId(request.session['user_id'])})
176
177 if not user:
178 # Something's wrong... this user doesn't exist? Invalidate
179 # this session.
180 request.session.invalidate()
181
182 request.user = user
183
184
185 def import_component(import_string):
186 """
187 Import a module component defined by STRING. Probably a method,
188 class, or global variable.
189
190 Args:
191 - import_string: a string that defines what to import. Written
192 in the format of "module1.module2:component"
193 """
194 module_name, func_name = import_string.split(':', 1)
195 __import__(module_name)
196 module = sys.modules[module_name]
197 func = getattr(module, func_name)
198 return func
199
200 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
201
202
203 def slugify(text, delim=u'-'):
204 """
205 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
206 """
207 result = []
208 for word in _punct_re.split(text.lower()):
209 word = word.encode('translit/long')
210 if word:
211 result.append(word)
212 return unicode(delim.join(result))
213
214 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
215 ### Special email test stuff begins HERE
216 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
217
218 # We have two "test inboxes" here:
219 #
220 # EMAIL_TEST_INBOX:
221 # ----------------
222 # If you're writing test views, you'll probably want to check this.
223 # It contains a list of MIMEText messages.
224 #
225 # EMAIL_TEST_MBOX_INBOX:
226 # ----------------------
227 # This collects the messages from the FakeMhost inbox. It's reslly
228 # just here for testing the send_email method itself.
229 #
230 # Anyway this contains:
231 # - from
232 # - to: a list of email recipient addresses
233 # - message: not just the body, but the whole message, including
234 # headers, etc.
235 #
236 # ***IMPORTANT!***
237 # ----------------
238 # Before running tests that call functions which send email, you should
239 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
240
241 EMAIL_TEST_INBOX = []
242 EMAIL_TEST_MBOX_INBOX = []
243
244
245 class FakeMhost(object):
246 """
247 Just a fake mail host so we can capture and test messages
248 from send_email
249 """
250 def login(self, *args, **kwargs):
251 pass
252
253 def sendmail(self, from_addr, to_addrs, message):
254 EMAIL_TEST_MBOX_INBOX.append(
255 {'from': from_addr,
256 'to': to_addrs,
257 'message': message})
258
259
260 def _clear_test_inboxes():
261 global EMAIL_TEST_INBOX
262 global EMAIL_TEST_MBOX_INBOX
263 EMAIL_TEST_INBOX = []
264 EMAIL_TEST_MBOX_INBOX = []
265
266 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
267 ### </Special email test stuff>
268 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
269
270
271 def send_email(from_addr, to_addrs, subject, message_body):
272 """
273 Simple email sending wrapper, use this so we can capture messages
274 for unit testing purposes.
275
276 Args:
277 - from_addr: address you're sending the email from
278 - to_addrs: list of recipient email addresses
279 - subject: subject of the email
280 - message_body: email body text
281 """
282 if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
283 mhost = FakeMhost()
284 elif not mg_globals.app_config['email_debug_mode']:
285 mhost = smtplib.SMTP(
286 mg_globals.app_config['email_smtp_host'],
287 mg_globals.app_config['email_smtp_port'])
288
289 # SMTP.__init__ Issues SMTP.connect implicitly if host
290 if not mg_globals.app_config['email_smtp_host']: # e.g. host = ''
291 mhost.connect() # We SMTP.connect explicitly
292
293 if mg_globals.app_config['email_smtp_user'] \
294 or mg_globals.app_config['email_smtp_pass']:
295 mhost.login(
296 mg_globals.app_config['email_smtp_user'],
297 mg_globals.app_config['email_smtp_pass'])
298
299 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
300 message['Subject'] = subject
301 message['From'] = from_addr
302 message['To'] = ', '.join(to_addrs)
303
304 if TESTS_ENABLED:
305 EMAIL_TEST_INBOX.append(message)
306
307 if mg_globals.app_config['email_debug_mode']:
308 print u"===== Email ====="
309 print u"From address: %s" % message['From']
310 print u"To addresses: %s" % message['To']
311 print u"Subject: %s" % message['Subject']
312 print u"-- Body: --"
313 print message.get_payload(decode=True)
314
315 return mhost.sendmail(from_addr, to_addrs, message.as_string())
316
317
318 ###################
319 # Translation tools
320 ###################
321
322
323 TRANSLATIONS_PATH = pkg_resources.resource_filename(
324 'mediagoblin', 'i18n')
325
326
327 def locale_to_lower_upper(locale):
328 """
329 Take a locale, regardless of style, and format it like "en-us"
330 """
331 if '-' in locale:
332 lang, country = locale.split('-', 1)
333 return '%s_%s' % (lang.lower(), country.upper())
334 elif '_' in locale:
335 lang, country = locale.split('_', 1)
336 return '%s_%s' % (lang.lower(), country.upper())
337 else:
338 return locale.lower()
339
340
341 def locale_to_lower_lower(locale):
342 """
343 Take a locale, regardless of style, and format it like "en_US"
344 """
345 if '_' in locale:
346 lang, country = locale.split('_', 1)
347 return '%s-%s' % (lang.lower(), country.lower())
348 else:
349 return locale.lower()
350
351
352 def get_locale_from_request(request):
353 """
354 Figure out what target language is most appropriate based on the
355 request
356 """
357 request_form = request.GET or request.POST
358
359 if 'lang' in request_form:
360 return locale_to_lower_upper(request_form['lang'])
361
362 accept_lang_matches = request.accept_language.best_matches()
363
364 # Your routing can explicitly specify a target language
365 matchdict = request.matchdict or {}
366
367 if 'locale' in matchdict:
368 target_lang = matchdict['locale']
369 elif 'target_lang' in request.session:
370 target_lang = request.session['target_lang']
371 # Pull the first acceptable language
372 elif accept_lang_matches:
373 target_lang = accept_lang_matches[0]
374 # Fall back to English
375 else:
376 target_lang = 'en'
377
378 return locale_to_lower_upper(target_lang)
379
380
381 # A super strict version of the lxml.html cleaner class
382 HTML_CLEANER = Cleaner(
383 scripts=True,
384 javascript=True,
385 comments=True,
386 style=True,
387 links=True,
388 page_structure=True,
389 processing_instructions=True,
390 embedded=True,
391 frames=True,
392 forms=True,
393 annoying_tags=True,
394 allow_tags=[
395 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
396 remove_unknown_tags=False, # can't be used with allow_tags
397 safe_attrs_only=True,
398 add_nofollow=True, # for now
399 host_whitelist=(),
400 whitelist_tags=set([]))
401
402
403 def clean_html(html):
404 # clean_html barfs on an empty string
405 if not html:
406 return u''
407
408 return HTML_CLEANER.clean_html(html)
409
410
411 def convert_to_tag_list_of_dicts(tag_string):
412 """
413 Filter input from incoming string containing user tags,
414
415 Strips trailing, leading, and internal whitespace, and also converts
416 the "tags" text into an array of tags
417 """
418 taglist = []
419 if tag_string:
420
421 # Strip out internal, trailing, and leading whitespace
422 stripped_tag_string = u' '.join(tag_string.strip().split())
423
424 # Split the tag string into a list of tags
425 for tag in stripped_tag_string.split(
426 mg_globals.app_config['tags_delimiter']):
427
428 # Ignore empty or duplicate tags
429 if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
430
431 taglist.append({'name': tag.strip(),
432 'slug': slugify(tag.strip())})
433 return taglist
434
435
436 def media_tags_as_string(media_entry_tags):
437 """
438 Generate a string from a media item's tags, stored as a list of dicts
439
440 This is the opposite of convert_to_tag_list_of_dicts
441 """
442 media_tag_string = ''
443 if media_entry_tags:
444 media_tag_string = mg_globals.app_config['tags_delimiter'].join(
445 [tag['name'] for tag in media_entry_tags])
446 return media_tag_string
447
448 TOO_LONG_TAG_WARNING = \
449 u'Tags must be shorter than %s characters. Tags that are too long: %s'
450
451
452 def tag_length_validator(form, field):
453 """
454 Make sure tags do not exceed the maximum tag length.
455 """
456 tags = convert_to_tag_list_of_dicts(field.data)
457 too_long_tags = [
458 tag['name'] for tag in tags
459 if len(tag['name']) > mg_globals.app_config['tags_max_length']]
460
461 if too_long_tags:
462 raise wtforms.ValidationError(
463 TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
464 ', '.join(too_long_tags)))
465
466
467 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
468
469
470 def cleaned_markdown_conversion(text):
471 """
472 Take a block of text, run it through MarkDown, and clean its HTML.
473 """
474 # Markdown will do nothing with and clean_html can do nothing with
475 # an empty string :)
476 if not text:
477 return u''
478
479 return clean_html(MARKDOWN_INSTANCE.convert(text))
480
481
482 SETUP_GETTEXTS = {}
483
484
485 def setup_gettext(locale):
486 """
487 Setup the gettext instance based on this locale
488 """
489 # Later on when we have plugins we may want to enable the
490 # multi-translations system they have so we can handle plugin
491 # translations too
492
493 # TODO: fallback nicely on translations from pt_PT to pt if not
494 # available, etc.
495 if locale in SETUP_GETTEXTS:
496 this_gettext = SETUP_GETTEXTS[locale]
497 else:
498 this_gettext = gettext.translation(
499 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
500 if exists(locale):
501 SETUP_GETTEXTS[locale] = this_gettext
502
503 mg_globals.setup_globals(
504 translations=this_gettext)
505
506
507 # Force en to be setup before anything else so that
508 # mg_globals.translations is never None
509 setup_gettext('en')
510
511
512 def pass_to_ugettext(*args, **kwargs):
513 """
514 Pass a translation on to the appropriate ugettext method.
515
516 The reason we can't have a global ugettext method is because
517 mg_globals gets swapped out by the application per-request.
518 """
519 return mg_globals.translations.ugettext(
520 *args, **kwargs)
521
522
523 def lazy_pass_to_ugettext(*args, **kwargs):
524 """
525 Lazily pass to ugettext.
526
527 This is useful if you have to define a translation on a module
528 level but you need it to not translate until the time that it's
529 used as a string.
530 """
531 return LazyProxy(pass_to_ugettext, *args, **kwargs)
532
533
534 def pass_to_ngettext(*args, **kwargs):
535 """
536 Pass a translation on to the appropriate ngettext method.
537
538 The reason we can't have a global ngettext method is because
539 mg_globals gets swapped out by the application per-request.
540 """
541 return mg_globals.translations.ngettext(
542 *args, **kwargs)
543
544
545 def lazy_pass_to_ngettext(*args, **kwargs):
546 """
547 Lazily pass to ngettext.
548
549 This is useful if you have to define a translation on a module
550 level but you need it to not translate until the time that it's
551 used as a string.
552 """
553 return LazyProxy(pass_to_ngettext, *args, **kwargs)
554
555
556 def fake_ugettext_passthrough(string):
557 """
558 Fake a ugettext call for extraction's sake ;)
559
560 In wtforms there's a separate way to define a method to translate
561 things... so we just need to mark up the text so that it can be
562 extracted, not so that it's actually run through gettext.
563 """
564 return string
565
566
567 PAGINATION_DEFAULT_PER_PAGE = 30
568
569
570 class Pagination(object):
571 """
572 Pagination class for mongodb queries.
573
574 Initialization through __init__(self, cursor, page=1, per_page=2),
575 get actual data slice through __call__().
576 """
577
578 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
579 jump_to_id=False):
580 """
581 Initializes Pagination
582
583 Args:
584 - page: requested page
585 - per_page: number of objects per page
586 - cursor: db cursor
587 - jump_to_id: ObjectId, sets the page to the page containing the
588 object with _id == jump_to_id.
589 """
590 self.page = page
591 self.per_page = per_page
592 self.cursor = cursor
593 self.total_count = self.cursor.count()
594 self.active_id = None
595
596 if jump_to_id:
597 cursor = copy.copy(self.cursor)
598
599 for (doc, increment) in izip(cursor, count(0)):
600 if doc['_id'] == jump_to_id:
601 self.page = 1 + int(floor(increment / self.per_page))
602
603 self.active_id = jump_to_id
604 break
605
606 def __call__(self):
607 """
608 Returns slice of objects for the requested page
609 """
610 return self.cursor.skip(
611 (self.page - 1) * self.per_page).limit(self.per_page)
612
613 @property
614 def pages(self):
615 return int(ceil(self.total_count / float(self.per_page)))
616
617 @property
618 def has_prev(self):
619 return self.page > 1
620
621 @property
622 def has_next(self):
623 return self.page < self.pages
624
625 def iter_pages(self, left_edge=2, left_current=2,
626 right_current=5, right_edge=2):
627 last = 0
628 for num in xrange(1, self.pages + 1):
629 if num <= left_edge or \
630 (num > self.page - left_current - 1 and \
631 num < self.page + right_current) or \
632 num > self.pages - right_edge:
633 if last + 1 != num:
634 yield None
635 yield num
636 last = num
637
638 def get_page_url_explicit(self, base_url, get_params, page_no):
639 """Get a page url by adding a page= parameter to the base url
640 """
641 new_get_params = copy.copy(get_params or {})
642 new_get_params['page'] = page_no
643 return "%s?%s" % (
644 base_url, urllib.urlencode(new_get_params))
645
646 def get_page_url(self, request, page_no):
647 """Get a new page url based of the request, and the new page number.
648
649 This is a nice wrapper around get_page_url_explicit()
650 """
651 return self.get_page_url_explicit(
652 request.path_info, request.GET, page_no)
653
654
655 def gridify_list(this_list, num_cols=5):
656 """
657 Generates a list of lists where each sub-list's length depends on
658 the number of columns in the list
659 """
660 grid = []
661
662 # Figure out how many rows we should have
663 num_rows = int(ceil(float(len(this_list)) / num_cols))
664
665 for row_num in range(num_rows):
666 slice_min = row_num * num_cols
667 slice_max = (row_num + 1) * num_cols
668
669 row = this_list[slice_min:slice_max]
670
671 grid.append(row)
672
673 return grid
674
675
676 def gridify_cursor(this_cursor, num_cols=5):
677 """
678 Generates a list of lists where each sub-list's length depends on
679 the number of columns in the list
680 """
681 return gridify_list(list(this_cursor), num_cols)
682
683
684 def render_404(request):
685 """
686 Render a 404.
687 """
688 return render_to_response(
689 request, 'mediagoblin/404.html', {}, status=400)
690
691
692 def delete_media_files(media):
693 """
694 Delete all files associated with a MediaEntry
695
696 Arguments:
697 - media: A MediaEntry document
698 """
699 for listpath in media['media_files'].itervalues():
700 mg_globals.public_store.delete_file(
701 listpath)
702
703 for attachment in media['attachment_files']:
704 mg_globals.public_store.delete_file(
705 attachment['filepath'])