Logout -> log out
[mediagoblin.git] / mediagoblin / util.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
e5572c60
ML
2# Copyright (C) 2011 Free Software Foundation, Inc
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
af2fcba5
JW
17from __future__ import division
18
4d4f6050 19from email.MIMEText import MIMEText
b77eec65
CAW
20import gettext
21import pkg_resources
4d4f6050 22import smtplib
cb8ea0fe 23import sys
0546833c 24import re
c5678c1a 25import urllib
af2fcba5 26from math import ceil, floor
c5678c1a 27import copy
909371cd 28import wtforms
c5678c1a 29
f99f61c6 30from babel.localedata import exists
1c266dc3 31from babel.support import LazyProxy
31a8ff42 32import jinja2
0546833c 33import translitcodec
9150244a 34from webob import Response, exc
a68ee555 35from lxml.html.clean import Cleaner
4bf8e888 36import markdown
1c266dc3 37from wtforms.form import Form
31a8ff42 38
6e7ce8d1 39from mediagoblin import mg_globals
22646703 40from mediagoblin import messages
c5678c1a 41from mediagoblin.db.util import ObjectId
29f3fb70 42
af2fcba5
JW
43from itertools import izip, count
44
2c9e635a
JW
45DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
46
4d4f6050
CAW
47TESTS_ENABLED = False
48def _activate_testing():
49 """
50 Call this to activate testing in util.py
51 """
52 global TESTS_ENABLED
53 TESTS_ENABLED = True
54
55
66471f0e
CAW
56def clear_test_buckets():
57 """
58 We store some things for testing purposes that should be cleared
59 when we want a "clean slate" of information for our next round of
60 tests. Call this function to wipe all that stuff clean.
61
62 Also wipes out some other things we might redefine during testing,
63 like the jinja envs.
64 """
65 global SETUP_JINJA_ENVS
66 SETUP_JINJA_ENVS = {}
67
68 global EMAIL_TEST_INBOX
69 global EMAIL_TEST_MBOX_INBOX
70 EMAIL_TEST_INBOX = []
71 EMAIL_TEST_MBOX_INBOX = []
72
73 clear_test_template_context()
74
75
f99f61c6
CAW
76SETUP_JINJA_ENVS = {}
77
78
0e0e3d9a
CAW
79def get_jinja_env(template_loader, locale):
80 """
81 Set up the Jinja environment,
82
83 (In the future we may have another system for providing theming;
84 for now this is good enough.)
85 """
b77eec65
CAW
86 setup_gettext(locale)
87
f99f61c6
CAW
88 # If we have a jinja environment set up with this locale, just
89 # return that one.
90 if SETUP_JINJA_ENVS.has_key(locale):
91 return SETUP_JINJA_ENVS[locale]
92
b77eec65 93 template_env = jinja2.Environment(
0e0e3d9a 94 loader=template_loader, autoescape=True,
44e2da2f 95 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
58dec5ef 96
b77eec65 97 template_env.install_gettext_callables(
84f27964 98 mg_globals.translations.ugettext,
1c266dc3 99 mg_globals.translations.ungettext)
b77eec65 100
22646703 101 # All templates will know how to ...
22646703 102 # ... fetch all waiting messages and remove them from the queue
b5017dba 103 # ... construct a grid of thumbnails or other media
22646703 104 template_env.globals['fetch_messages'] = messages.fetch_messages
b5017dba
CAW
105 template_env.globals['gridify_list'] = gridify_list
106 template_env.globals['gridify_cursor'] = gridify_cursor
22646703 107
f99f61c6
CAW
108 if exists(locale):
109 SETUP_JINJA_ENVS[locale] = template_env
110
b77eec65
CAW
111 return template_env
112
58dec5ef 113
e9279f21
CAW
114# We'll store context information here when doing unit tests
115TEMPLATE_TEST_CONTEXT = {}
116
117
67e8c45d 118def render_template(request, template_path, context):
e9279f21
CAW
119 """
120 Render a template with context.
121
122 Always inserts the request into the context, so you don't have to.
123 Also stores the context if we're doing unit tests. Helpful!
124 """
125 template = request.template_env.get_template(
67e8c45d 126 template_path)
e9279f21
CAW
127 context['request'] = request
128 rendered = template.render(context)
129
130 if TESTS_ENABLED:
67e8c45d 131 TEMPLATE_TEST_CONTEXT[template_path] = context
e9279f21
CAW
132
133 return rendered
134
135
136def clear_test_template_context():
137 global TEMPLATE_TEST_CONTEXT
138 TEMPLATE_TEST_CONTEXT = {}
139
140
1c63ad5d
E
141def render_to_response(request, template, context):
142 """Much like Django's shortcut.render()"""
143 return Response(render_template(request, template, context))
144
145
9150244a
E
146def redirect(request, *args, **kwargs):
147 """Returns a HTTPFound(), takes a request and then urlgen params"""
af2fcba5
JW
148
149 querystring = None
150 if kwargs.get('querystring'):
151 querystring = kwargs.get('querystring')
152 del kwargs['querystring']
153
154 return exc.HTTPFound(
155 location=''.join([
156 request.urlgen(*args, **kwargs),
157 querystring if querystring else '']))
9150244a
E
158
159
58dec5ef
CAW
160def setup_user_in_request(request):
161 """
162 Examine a request and tack on a request.user parameter if that's
163 appropriate.
164 """
165 if not request.session.has_key('user_id'):
59dd5c7e 166 request.user = None
58dec5ef
CAW
167 return
168
5d6840a0 169 user = None
6648c52b 170 user = request.app.db.User.one(
254bc431 171 {'_id': ObjectId(request.session['user_id'])})
5d6840a0 172
c74e1462
CAW
173 if not user:
174 # Something's wrong... this user doesn't exist? Invalidate
175 # this session.
58dec5ef 176 request.session.invalidate()
5d6840a0
CAW
177
178 request.user = user
cb8ea0fe
CAW
179
180
181def import_component(import_string):
182 """
183 Import a module component defined by STRING. Probably a method,
184 class, or global variable.
185
186 Args:
187 - import_string: a string that defines what to import. Written
188 in the format of "module1.module2:component"
189 """
190 module_name, func_name = import_string.split(':', 1)
191 __import__(module_name)
192 module = sys.modules[module_name]
193 func = getattr(module, func_name)
194 return func
4d4f6050 195
0546833c
AW
196_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
197
198def slugify(text, delim=u'-'):
199 """
200 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
201 """
202 result = []
203 for word in _punct_re.split(text.lower()):
204 word = word.encode('translit/long')
205 if word:
206 result.append(word)
207 return unicode(delim.join(result))
4d4f6050
CAW
208
209### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
210### Special email test stuff begins HERE
211### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
212
213# We have two "test inboxes" here:
214#
215# EMAIL_TEST_INBOX:
216# ----------------
217# If you're writing test views, you'll probably want to check this.
218# It contains a list of MIMEText messages.
219#
220# EMAIL_TEST_MBOX_INBOX:
221# ----------------------
222# This collects the messages from the FakeMhost inbox. It's reslly
223# just here for testing the send_email method itself.
224#
225# Anyway this contains:
226# - from
227# - to: a list of email recipient addresses
228# - message: not just the body, but the whole message, including
229# headers, etc.
230#
231# ***IMPORTANT!***
232# ----------------
233# Before running tests that call functions which send email, you should
234# always call _clear_test_inboxes() to "wipe" the inboxes clean.
235
236EMAIL_TEST_INBOX = []
237EMAIL_TEST_MBOX_INBOX = []
238
239
240class FakeMhost(object):
241 """
242 Just a fake mail host so we can capture and test messages
243 from send_email
244 """
245 def connect(self):
246 pass
247
248 def sendmail(self, from_addr, to_addrs, message):
249 EMAIL_TEST_MBOX_INBOX.append(
250 {'from': from_addr,
251 'to': to_addrs,
252 'message': message})
253
254def _clear_test_inboxes():
255 global EMAIL_TEST_INBOX
256 global EMAIL_TEST_MBOX_INBOX
257 EMAIL_TEST_INBOX = []
258 EMAIL_TEST_MBOX_INBOX = []
259
260### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
261### </Special email test stuff>
262### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
263
264def send_email(from_addr, to_addrs, subject, message_body):
61ec968b
CAW
265 """
266 Simple email sending wrapper, use this so we can capture messages
267 for unit testing purposes.
268
269 Args:
270 - from_addr: address you're sending the email from
271 - to_addrs: list of recipient email addresses
272 - subject: subject of the email
273 - message_body: email body text
274 """
4d4f6050 275 # TODO: make a mock mhost if testing is enabled
6ae8b541 276 if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
4d4f6050 277 mhost = FakeMhost()
6ae8b541 278 elif not mg_globals.app_config['email_debug_mode']:
4d4f6050
CAW
279 mhost = smtplib.SMTP()
280
281 mhost.connect()
282
283 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
284 message['Subject'] = subject
285 message['From'] = from_addr
286 message['To'] = ', '.join(to_addrs)
287
288 if TESTS_ENABLED:
289 EMAIL_TEST_INBOX.append(message)
290
6ae8b541 291 if mg_globals.app_config['email_debug_mode']:
29f3fb70
CAW
292 print u"===== Email ====="
293 print u"From address: %s" % message['From']
294 print u"To addresses: %s" % message['To']
295 print u"Subject: %s" % message['Subject']
296 print u"-- Body: --"
297 print message.get_payload(decode=True)
298
21919313 299 return mhost.sendmail(from_addr, to_addrs, message.as_string())
20c834ff 300
8b28bee4
CAW
301
302###################
303# Translation tools
304###################
305
306
b77eec65 307TRANSLATIONS_PATH = pkg_resources.resource_filename(
145922b8 308 'mediagoblin', 'i18n')
b77eec65
CAW
309
310
8b28bee4
CAW
311def locale_to_lower_upper(locale):
312 """
313 Take a locale, regardless of style, and format it like "en-us"
314 """
315 if '-' in locale:
316 lang, country = locale.split('-', 1)
317 return '%s_%s' % (lang.lower(), country.upper())
318 elif '_' in locale:
319 lang, country = locale.split('_', 1)
320 return '%s_%s' % (lang.lower(), country.upper())
321 else:
322 return locale.lower()
323
324
325def locale_to_lower_lower(locale):
326 """
327 Take a locale, regardless of style, and format it like "en_US"
328 """
329 if '_' in locale:
330 lang, country = locale.split('_', 1)
331 return '%s-%s' % (lang.lower(), country.lower())
332 else:
333 return locale.lower()
334
335
336def get_locale_from_request(request):
337 """
338 Figure out what target language is most appropriate based on the
339 request
340 """
341 request_form = request.GET or request.POST
342
343 if request_form.has_key('lang'):
344 return locale_to_lower_upper(request_form['lang'])
345
346 accept_lang_matches = request.accept_language.best_matches()
347
348 # Your routing can explicitly specify a target language
376e6ef2
CAW
349 if request.matchdict.has_key('locale'):
350 target_lang = request.matchdict['locale']
8b28bee4
CAW
351 elif request.session.has_key('target_lang'):
352 target_lang = request.session['target_lang']
353 # Pull the first acceptable language
354 elif accept_lang_matches:
355 target_lang = accept_lang_matches[0]
356 # Fall back to English
357 else:
358 target_lang = 'en'
359
0e0e3d9a 360 return locale_to_lower_upper(target_lang)
b77eec65
CAW
361
362
a68ee555
CAW
363# A super strict version of the lxml.html cleaner class
364HTML_CLEANER = Cleaner(
365 scripts=True,
366 javascript=True,
367 comments=True,
368 style=True,
369 links=True,
370 page_structure=True,
371 processing_instructions=True,
372 embedded=True,
373 frames=True,
374 forms=True,
375 annoying_tags=True,
376 allow_tags=[
377 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
378 remove_unknown_tags=False, # can't be used with allow_tags
379 safe_attrs_only=True,
380 add_nofollow=True, # for now
381 host_whitelist=(),
382 whitelist_tags=set([]))
383
384
385def clean_html(html):
4fd18da0
CAW
386 # clean_html barfs on an empty string
387 if not html:
388 return u''
389
a68ee555
CAW
390 return HTML_CLEANER.clean_html(html)
391
392
0712a06d 393def convert_to_tag_list_of_dicts(tag_string):
cdf538bd 394 """
909371cd 395 Filter input from incoming string containing user tags,
4bf8e888 396
cdf538bd 397 Strips trailing, leading, and internal whitespace, and also converts
cc7ff3c5 398 the "tags" text into an array of tags
cdf538bd 399 """
6f2e4585 400 taglist = []
cdf538bd 401 if tag_string:
cc7ff3c5
CFD
402
403 # Strip out internal, trailing, and leading whitespace
93e3468a 404 stripped_tag_string = u' '.join(tag_string.strip().split())
cc7ff3c5
CFD
405
406 # Split the tag string into a list of tags
10d7496d
CFD
407 for tag in stripped_tag_string.split(
408 mg_globals.app_config['tags_delimiter']):
cc7ff3c5 409
f99b5cae
CFD
410 # Ignore empty or duplicate tags
411 if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
cc7ff3c5 412
1b89b817
CAW
413 taglist.append({'name': tag.strip(),
414 'slug': slugify(tag.strip())})
6f2e4585 415 return taglist
cdf538bd
CFD
416
417
0712a06d
CFD
418def media_tags_as_string(media_entry_tags):
419 """
420 Generate a string from a media item's tags, stored as a list of dicts
421
422 This is the opposite of convert_to_tag_list_of_dicts
423 """
424 media_tag_string = ''
425 if media_entry_tags:
426 media_tag_string = mg_globals.app_config['tags_delimiter'].join(
427 [tag['name'] for tag in media_entry_tags])
428 return media_tag_string
429
909371cd
CFD
430TOO_LONG_TAG_WARNING = \
431 u'Tags must be shorter than %s characters. Tags that are too long: %s'
432
433def tag_length_validator(form, field):
434 """
435 Make sure tags do not exceed the maximum tag length.
436 """
0712a06d 437 tags = convert_to_tag_list_of_dicts(field.data)
909371cd 438 too_long_tags = [
0712a06d
CFD
439 tag['name'] for tag in tags
440 if len(tag['name']) > mg_globals.app_config['tags_max_length']]
909371cd
CFD
441
442 if too_long_tags:
443 raise wtforms.ValidationError(
10d7496d
CFD
444 TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
445 ', '.join(too_long_tags)))
4bf8e888
CAW
446
447
cdf538bd 448MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
4bf8e888
CAW
449
450def cleaned_markdown_conversion(text):
451 """
452 Take a block of text, run it through MarkDown, and clean its HTML.
453 """
82688846
CAW
454 # Markdown will do nothing with and clean_html can do nothing with
455 # an empty string :)
456 if not text:
457 return u''
458
4bf8e888
CAW
459 return clean_html(MARKDOWN_INSTANCE.convert(text))
460
461
f99f61c6
CAW
462SETUP_GETTEXTS = {}
463
b77eec65
CAW
464def setup_gettext(locale):
465 """
466 Setup the gettext instance based on this locale
467 """
468 # Later on when we have plugins we may want to enable the
469 # multi-translations system they have so we can handle plugin
470 # translations too
471
472 # TODO: fallback nicely on translations from pt_PT to pt if not
473 # available, etc.
f99f61c6
CAW
474 if SETUP_GETTEXTS.has_key(locale):
475 this_gettext = SETUP_GETTEXTS[locale]
476 else:
477 this_gettext = gettext.translation(
478 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
479 if exists(locale):
480 SETUP_GETTEXTS[locale] = this_gettext
b77eec65 481
6e7ce8d1 482 mg_globals.setup_globals(
b77eec65 483 translations=this_gettext)
ae85ed0f
BK
484
485
03e5bd6d
CAW
486# Force en to be setup before anything else so that
487# mg_globals.translations is never None
488setup_gettext('en')
489
490
491def pass_to_ugettext(*args, **kwargs):
492 """
493 Pass a translation on to the appropriate ugettext method.
494
495 The reason we can't have a global ugettext method is because
496 mg_globals gets swapped out by the application per-request.
497 """
498 return mg_globals.translations.ugettext(
499 *args, **kwargs)
500
501
1c266dc3
CAW
502def lazy_pass_to_ugettext(*args, **kwargs):
503 """
504 Lazily pass to ugettext.
505
506 This is useful if you have to define a translation on a module
507 level but you need it to not translate until the time that it's
508 used as a string.
509 """
510 return LazyProxy(pass_to_ugettext, *args, **kwargs)
511
512
513def pass_to_ngettext(*args, **kwargs):
514 """
515 Pass a translation on to the appropriate ngettext method.
516
517 The reason we can't have a global ngettext method is because
518 mg_globals gets swapped out by the application per-request.
519 """
520 return mg_globals.translations.ngettext(
521 *args, **kwargs)
522
523
524def lazy_pass_to_ngettext(*args, **kwargs):
525 """
526 Lazily pass to ngettext.
527
528 This is useful if you have to define a translation on a module
529 level but you need it to not translate until the time that it's
530 used as a string.
531 """
532 return LazyProxy(pass_to_ngettext, *args, **kwargs)
533
534
535def fake_ugettext_passthrough(string):
536 """
537 Fake a ugettext call for extraction's sake ;)
538
539 In wtforms there's a separate way to define a method to translate
540 things... so we just need to mark up the text so that it can be
541 extracted, not so that it's actually run through gettext.
542 """
543 return string
544
545
b9e9610b
CAW
546PAGINATION_DEFAULT_PER_PAGE = 30
547
ae85ed0f
BK
548class Pagination(object):
549 """
dffa0b09
CAW
550 Pagination class for mongodb queries.
551
552 Initialization through __init__(self, cursor, page=1, per_page=2),
553 get actual data slice through __call__().
ae85ed0f 554 """
ca3ca51c 555
af2fcba5
JW
556 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
557 jump_to_id=False):
44e3e917 558 """
a98d5254
CAW
559 Initializes Pagination
560
561 Args:
562 - page: requested page
563 - per_page: number of objects per page
564 - cursor: db cursor
af2fcba5
JW
565 - jump_to_id: ObjectId, sets the page to the page containing the object
566 with _id == jump_to_id.
44e3e917 567 """
af2fcba5 568 self.page = page
ca3ca51c
BK
569 self.per_page = per_page
570 self.cursor = cursor
ca3ca51c 571 self.total_count = self.cursor.count()
af2fcba5
JW
572 self.active_id = None
573
574 if jump_to_id:
575 cursor = copy.copy(self.cursor)
576
577 for (doc, increment) in izip(cursor, count(0)):
578 if doc['_id'] == jump_to_id:
579 self.page = 1 + int(floor(increment / self.per_page))
580
581 self.active_id = jump_to_id
582 break
583
ca3ca51c
BK
584
585 def __call__(self):
44e3e917 586 """
a98d5254 587 Returns slice of objects for the requested page
44e3e917 588 """
140e2102
CAW
589 return self.cursor.skip(
590 (self.page - 1) * self.per_page).limit(self.per_page)
ae85ed0f
BK
591
592 @property
593 def pages(self):
594 return int(ceil(self.total_count / float(self.per_page)))
595
596 @property
597 def has_prev(self):
598 return self.page > 1
599
600 @property
601 def has_next(self):
602 return self.page < self.pages
603
604 def iter_pages(self, left_edge=2, left_current=2,
605 right_current=5, right_edge=2):
606 last = 0
607 for num in xrange(1, self.pages + 1):
608 if num <= left_edge or \
609 (num > self.page - left_current - 1 and \
610 num < self.page + right_current) or \
611 num > self.pages - right_edge:
612 if last + 1 != num:
613 yield None
614 yield num
615 last = num
44e3e917 616
50c880ac 617 def get_page_url_explicit(self, base_url, get_params, page_no):
44e3e917 618 """
50c880ac 619 Get a page url by adding a page= parameter to the base url
44e3e917
BK
620 """
621 new_get_params = copy.copy(get_params or {})
622 new_get_params['page'] = page_no
623 return "%s?%s" % (
50c880ac
CAW
624 base_url, urllib.urlencode(new_get_params))
625
626 def get_page_url(self, request, page_no):
627 """
628 Get a new page url based of the request, and the new page number.
629
630 This is a nice wrapper around get_page_url_explicit()
631 """
632 return self.get_page_url_explicit(
633 request.path_info, request.GET, page_no)
b5017dba
CAW
634
635
636def gridify_list(this_list, num_cols=5):
637 """
638 Generates a list of lists where each sub-list's length depends on
639 the number of columns in the list
640 """
641 grid = []
642
643 # Figure out how many rows we should have
644 num_rows = int(ceil(float(len(this_list)) / num_cols))
645
646 for row_num in range(num_rows):
647 slice_min = row_num * num_cols
648 slice_max = (row_num + 1) * num_cols
649
650 row = this_list[slice_min:slice_max]
651
652 grid.append(row)
653
654 return grid
655
656
657def gridify_cursor(this_cursor, num_cols=5):
658 """
659 Generates a list of lists where each sub-list's length depends on
660 the number of columns in the list
661 """
662 return gridify_list(list(this_cursor), num_cols)