Feature #571 - Closing storage objects - Removed closing(), renamed
[mediagoblin.git] / mediagoblin / util.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
12a100e4 2# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
e5572c60
ML
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
af2fcba5
JW
17from __future__ import division
18
4d4f6050 19from email.MIMEText import MIMEText
b77eec65
CAW
20import gettext
21import pkg_resources
4d4f6050 22import smtplib
cb8ea0fe 23import sys
0546833c 24import re
c5678c1a 25import urllib
af2fcba5 26from math import ceil, floor
c5678c1a 27import copy
909371cd 28import wtforms
c5678c1a 29
f99f61c6 30from babel.localedata import exists
1c266dc3 31from babel.support import LazyProxy
31a8ff42 32import jinja2
0546833c 33import translitcodec
9150244a 34from webob import Response, exc
a68ee555 35from lxml.html.clean import Cleaner
4bf8e888 36import markdown
1c266dc3 37from wtforms.form import Form
31a8ff42 38
6e7ce8d1 39from mediagoblin import mg_globals
22646703 40from mediagoblin import messages
c5678c1a 41from mediagoblin.db.util import ObjectId
29f3fb70 42
af2fcba5
JW
43from itertools import izip, count
44
2c9e635a
JW
45DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
46
4d4f6050
CAW
47TESTS_ENABLED = False
48def _activate_testing():
49 """
50 Call this to activate testing in util.py
51 """
52 global TESTS_ENABLED
53 TESTS_ENABLED = True
54
55
66471f0e
CAW
56def clear_test_buckets():
57 """
58 We store some things for testing purposes that should be cleared
59 when we want a "clean slate" of information for our next round of
60 tests. Call this function to wipe all that stuff clean.
61
62 Also wipes out some other things we might redefine during testing,
63 like the jinja envs.
64 """
65 global SETUP_JINJA_ENVS
66 SETUP_JINJA_ENVS = {}
67
68 global EMAIL_TEST_INBOX
69 global EMAIL_TEST_MBOX_INBOX
70 EMAIL_TEST_INBOX = []
71 EMAIL_TEST_MBOX_INBOX = []
72
73 clear_test_template_context()
74
75
f99f61c6
CAW
76SETUP_JINJA_ENVS = {}
77
78
0e0e3d9a
CAW
79def get_jinja_env(template_loader, locale):
80 """
81 Set up the Jinja environment,
82
83 (In the future we may have another system for providing theming;
84 for now this is good enough.)
85 """
b77eec65
CAW
86 setup_gettext(locale)
87
f99f61c6
CAW
88 # If we have a jinja environment set up with this locale, just
89 # return that one.
90 if SETUP_JINJA_ENVS.has_key(locale):
91 return SETUP_JINJA_ENVS[locale]
92
b77eec65 93 template_env = jinja2.Environment(
0e0e3d9a 94 loader=template_loader, autoescape=True,
44e2da2f 95 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
58dec5ef 96
b77eec65 97 template_env.install_gettext_callables(
84f27964 98 mg_globals.translations.ugettext,
1c266dc3 99 mg_globals.translations.ungettext)
b77eec65 100
22646703 101 # All templates will know how to ...
22646703 102 # ... fetch all waiting messages and remove them from the queue
b5017dba 103 # ... construct a grid of thumbnails or other media
22646703 104 template_env.globals['fetch_messages'] = messages.fetch_messages
b5017dba
CAW
105 template_env.globals['gridify_list'] = gridify_list
106 template_env.globals['gridify_cursor'] = gridify_cursor
22646703 107
f99f61c6
CAW
108 if exists(locale):
109 SETUP_JINJA_ENVS[locale] = template_env
110
b77eec65
CAW
111 return template_env
112
58dec5ef 113
e9279f21
CAW
114# We'll store context information here when doing unit tests
115TEMPLATE_TEST_CONTEXT = {}
116
117
67e8c45d 118def render_template(request, template_path, context):
e9279f21
CAW
119 """
120 Render a template with context.
121
122 Always inserts the request into the context, so you don't have to.
123 Also stores the context if we're doing unit tests. Helpful!
124 """
125 template = request.template_env.get_template(
67e8c45d 126 template_path)
e9279f21
CAW
127 context['request'] = request
128 rendered = template.render(context)
129
130 if TESTS_ENABLED:
67e8c45d 131 TEMPLATE_TEST_CONTEXT[template_path] = context
e9279f21
CAW
132
133 return rendered
134
135
136def clear_test_template_context():
137 global TEMPLATE_TEST_CONTEXT
138 TEMPLATE_TEST_CONTEXT = {}
139
140
a7c641d1 141def render_to_response(request, template, context, status=200):
1c63ad5d 142 """Much like Django's shortcut.render()"""
a7c641d1
CAW
143 return Response(
144 render_template(request, template, context),
145 status=status)
1c63ad5d
E
146
147
9150244a
E
148def redirect(request, *args, **kwargs):
149 """Returns a HTTPFound(), takes a request and then urlgen params"""
af2fcba5
JW
150
151 querystring = None
152 if kwargs.get('querystring'):
153 querystring = kwargs.get('querystring')
154 del kwargs['querystring']
155
156 return exc.HTTPFound(
157 location=''.join([
158 request.urlgen(*args, **kwargs),
159 querystring if querystring else '']))
9150244a
E
160
161
58dec5ef
CAW
162def setup_user_in_request(request):
163 """
164 Examine a request and tack on a request.user parameter if that's
165 appropriate.
166 """
167 if not request.session.has_key('user_id'):
59dd5c7e 168 request.user = None
58dec5ef
CAW
169 return
170
5d6840a0 171 user = None
6648c52b 172 user = request.app.db.User.one(
254bc431 173 {'_id': ObjectId(request.session['user_id'])})
5d6840a0 174
c74e1462
CAW
175 if not user:
176 # Something's wrong... this user doesn't exist? Invalidate
177 # this session.
58dec5ef 178 request.session.invalidate()
5d6840a0
CAW
179
180 request.user = user
cb8ea0fe
CAW
181
182
183def import_component(import_string):
184 """
185 Import a module component defined by STRING. Probably a method,
186 class, or global variable.
187
188 Args:
189 - import_string: a string that defines what to import. Written
190 in the format of "module1.module2:component"
191 """
192 module_name, func_name = import_string.split(':', 1)
193 __import__(module_name)
194 module = sys.modules[module_name]
195 func = getattr(module, func_name)
196 return func
4d4f6050 197
0546833c
AW
198_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
199
200def slugify(text, delim=u'-'):
201 """
202 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
203 """
204 result = []
205 for word in _punct_re.split(text.lower()):
206 word = word.encode('translit/long')
207 if word:
208 result.append(word)
209 return unicode(delim.join(result))
4d4f6050
CAW
210
211### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
212### Special email test stuff begins HERE
213### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
214
215# We have two "test inboxes" here:
216#
217# EMAIL_TEST_INBOX:
218# ----------------
219# If you're writing test views, you'll probably want to check this.
220# It contains a list of MIMEText messages.
221#
222# EMAIL_TEST_MBOX_INBOX:
223# ----------------------
224# This collects the messages from the FakeMhost inbox. It's reslly
225# just here for testing the send_email method itself.
226#
227# Anyway this contains:
228# - from
229# - to: a list of email recipient addresses
230# - message: not just the body, but the whole message, including
231# headers, etc.
232#
233# ***IMPORTANT!***
234# ----------------
235# Before running tests that call functions which send email, you should
236# always call _clear_test_inboxes() to "wipe" the inboxes clean.
237
238EMAIL_TEST_INBOX = []
239EMAIL_TEST_MBOX_INBOX = []
240
241
242class FakeMhost(object):
243 """
244 Just a fake mail host so we can capture and test messages
245 from send_email
246 """
d71170ad 247 def login(self, *args, **kwargs):
4d4f6050
CAW
248 pass
249
250 def sendmail(self, from_addr, to_addrs, message):
251 EMAIL_TEST_MBOX_INBOX.append(
252 {'from': from_addr,
253 'to': to_addrs,
254 'message': message})
255
256def _clear_test_inboxes():
257 global EMAIL_TEST_INBOX
258 global EMAIL_TEST_MBOX_INBOX
259 EMAIL_TEST_INBOX = []
260 EMAIL_TEST_MBOX_INBOX = []
261
262### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
263### </Special email test stuff>
264### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
265
266def send_email(from_addr, to_addrs, subject, message_body):
61ec968b
CAW
267 """
268 Simple email sending wrapper, use this so we can capture messages
269 for unit testing purposes.
270
271 Args:
272 - from_addr: address you're sending the email from
273 - to_addrs: list of recipient email addresses
274 - subject: subject of the email
275 - message_body: email body text
276 """
6ae8b541 277 if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
4d4f6050 278 mhost = FakeMhost()
6ae8b541 279 elif not mg_globals.app_config['email_debug_mode']:
d71170ad
JW
280 mhost = smtplib.SMTP(
281 mg_globals.app_config['email_smtp_host'],
282 mg_globals.app_config['email_smtp_port'])
283
284 # SMTP.__init__ Issues SMTP.connect implicitly if host
285 if not mg_globals.app_config['email_smtp_host']: # e.g. host = ''
286 mhost.connect() # We SMTP.connect explicitly
287
288 if mg_globals.app_config['email_smtp_user'] \
289 or mg_globals.app_config['email_smtp_pass']:
47364ead
JW
290 mhost.login(
291 mg_globals.app_config['email_smtp_user'],
292 mg_globals.app_config['email_smtp_pass'])
4d4f6050
CAW
293
294 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
295 message['Subject'] = subject
296 message['From'] = from_addr
297 message['To'] = ', '.join(to_addrs)
298
299 if TESTS_ENABLED:
300 EMAIL_TEST_INBOX.append(message)
301
6ae8b541 302 if mg_globals.app_config['email_debug_mode']:
29f3fb70
CAW
303 print u"===== Email ====="
304 print u"From address: %s" % message['From']
305 print u"To addresses: %s" % message['To']
306 print u"Subject: %s" % message['Subject']
307 print u"-- Body: --"
308 print message.get_payload(decode=True)
309
21919313 310 return mhost.sendmail(from_addr, to_addrs, message.as_string())
20c834ff 311
8b28bee4
CAW
312
313###################
314# Translation tools
315###################
316
317
b77eec65 318TRANSLATIONS_PATH = pkg_resources.resource_filename(
145922b8 319 'mediagoblin', 'i18n')
b77eec65
CAW
320
321
8b28bee4
CAW
322def locale_to_lower_upper(locale):
323 """
324 Take a locale, regardless of style, and format it like "en-us"
325 """
326 if '-' in locale:
327 lang, country = locale.split('-', 1)
328 return '%s_%s' % (lang.lower(), country.upper())
329 elif '_' in locale:
330 lang, country = locale.split('_', 1)
331 return '%s_%s' % (lang.lower(), country.upper())
332 else:
333 return locale.lower()
334
335
336def locale_to_lower_lower(locale):
337 """
338 Take a locale, regardless of style, and format it like "en_US"
339 """
340 if '_' in locale:
341 lang, country = locale.split('_', 1)
342 return '%s-%s' % (lang.lower(), country.lower())
343 else:
344 return locale.lower()
345
346
347def get_locale_from_request(request):
348 """
349 Figure out what target language is most appropriate based on the
350 request
351 """
352 request_form = request.GET or request.POST
353
354 if request_form.has_key('lang'):
355 return locale_to_lower_upper(request_form['lang'])
356
357 accept_lang_matches = request.accept_language.best_matches()
358
359 # Your routing can explicitly specify a target language
bae8f3d8
CAW
360 matchdict = request.matchdict or {}
361
362 if matchdict.has_key('locale'):
363 target_lang = matchdict['locale']
8b28bee4
CAW
364 elif request.session.has_key('target_lang'):
365 target_lang = request.session['target_lang']
366 # Pull the first acceptable language
367 elif accept_lang_matches:
368 target_lang = accept_lang_matches[0]
369 # Fall back to English
370 else:
371 target_lang = 'en'
372
0e0e3d9a 373 return locale_to_lower_upper(target_lang)
b77eec65
CAW
374
375
a68ee555
CAW
376# A super strict version of the lxml.html cleaner class
377HTML_CLEANER = Cleaner(
378 scripts=True,
379 javascript=True,
380 comments=True,
381 style=True,
382 links=True,
383 page_structure=True,
384 processing_instructions=True,
385 embedded=True,
386 frames=True,
387 forms=True,
388 annoying_tags=True,
389 allow_tags=[
390 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
391 remove_unknown_tags=False, # can't be used with allow_tags
392 safe_attrs_only=True,
393 add_nofollow=True, # for now
394 host_whitelist=(),
395 whitelist_tags=set([]))
396
397
398def clean_html(html):
4fd18da0
CAW
399 # clean_html barfs on an empty string
400 if not html:
401 return u''
402
a68ee555
CAW
403 return HTML_CLEANER.clean_html(html)
404
405
0712a06d 406def convert_to_tag_list_of_dicts(tag_string):
cdf538bd 407 """
909371cd 408 Filter input from incoming string containing user tags,
4bf8e888 409
cdf538bd 410 Strips trailing, leading, and internal whitespace, and also converts
cc7ff3c5 411 the "tags" text into an array of tags
cdf538bd 412 """
6f2e4585 413 taglist = []
cdf538bd 414 if tag_string:
cc7ff3c5
CFD
415
416 # Strip out internal, trailing, and leading whitespace
93e3468a 417 stripped_tag_string = u' '.join(tag_string.strip().split())
cc7ff3c5
CFD
418
419 # Split the tag string into a list of tags
10d7496d
CFD
420 for tag in stripped_tag_string.split(
421 mg_globals.app_config['tags_delimiter']):
cc7ff3c5 422
f99b5cae
CFD
423 # Ignore empty or duplicate tags
424 if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
cc7ff3c5 425
1b89b817
CAW
426 taglist.append({'name': tag.strip(),
427 'slug': slugify(tag.strip())})
6f2e4585 428 return taglist
cdf538bd
CFD
429
430
0712a06d
CFD
431def media_tags_as_string(media_entry_tags):
432 """
433 Generate a string from a media item's tags, stored as a list of dicts
434
435 This is the opposite of convert_to_tag_list_of_dicts
436 """
437 media_tag_string = ''
438 if media_entry_tags:
439 media_tag_string = mg_globals.app_config['tags_delimiter'].join(
440 [tag['name'] for tag in media_entry_tags])
441 return media_tag_string
442
909371cd
CFD
443TOO_LONG_TAG_WARNING = \
444 u'Tags must be shorter than %s characters. Tags that are too long: %s'
445
446def tag_length_validator(form, field):
447 """
448 Make sure tags do not exceed the maximum tag length.
449 """
0712a06d 450 tags = convert_to_tag_list_of_dicts(field.data)
909371cd 451 too_long_tags = [
0712a06d
CFD
452 tag['name'] for tag in tags
453 if len(tag['name']) > mg_globals.app_config['tags_max_length']]
909371cd
CFD
454
455 if too_long_tags:
456 raise wtforms.ValidationError(
10d7496d
CFD
457 TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
458 ', '.join(too_long_tags)))
4bf8e888
CAW
459
460
cdf538bd 461MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
4bf8e888
CAW
462
463def cleaned_markdown_conversion(text):
464 """
465 Take a block of text, run it through MarkDown, and clean its HTML.
466 """
82688846
CAW
467 # Markdown will do nothing with and clean_html can do nothing with
468 # an empty string :)
469 if not text:
470 return u''
471
4bf8e888
CAW
472 return clean_html(MARKDOWN_INSTANCE.convert(text))
473
474
f99f61c6
CAW
475SETUP_GETTEXTS = {}
476
b77eec65
CAW
477def setup_gettext(locale):
478 """
479 Setup the gettext instance based on this locale
480 """
481 # Later on when we have plugins we may want to enable the
482 # multi-translations system they have so we can handle plugin
483 # translations too
484
485 # TODO: fallback nicely on translations from pt_PT to pt if not
486 # available, etc.
f99f61c6
CAW
487 if SETUP_GETTEXTS.has_key(locale):
488 this_gettext = SETUP_GETTEXTS[locale]
489 else:
490 this_gettext = gettext.translation(
491 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
492 if exists(locale):
493 SETUP_GETTEXTS[locale] = this_gettext
b77eec65 494
6e7ce8d1 495 mg_globals.setup_globals(
b77eec65 496 translations=this_gettext)
ae85ed0f
BK
497
498
03e5bd6d
CAW
499# Force en to be setup before anything else so that
500# mg_globals.translations is never None
501setup_gettext('en')
502
503
504def pass_to_ugettext(*args, **kwargs):
505 """
506 Pass a translation on to the appropriate ugettext method.
507
508 The reason we can't have a global ugettext method is because
509 mg_globals gets swapped out by the application per-request.
510 """
511 return mg_globals.translations.ugettext(
512 *args, **kwargs)
513
514
1c266dc3
CAW
515def lazy_pass_to_ugettext(*args, **kwargs):
516 """
517 Lazily pass to ugettext.
518
519 This is useful if you have to define a translation on a module
520 level but you need it to not translate until the time that it's
521 used as a string.
522 """
523 return LazyProxy(pass_to_ugettext, *args, **kwargs)
524
525
526def pass_to_ngettext(*args, **kwargs):
527 """
528 Pass a translation on to the appropriate ngettext method.
529
530 The reason we can't have a global ngettext method is because
531 mg_globals gets swapped out by the application per-request.
532 """
533 return mg_globals.translations.ngettext(
534 *args, **kwargs)
535
536
537def lazy_pass_to_ngettext(*args, **kwargs):
538 """
539 Lazily pass to ngettext.
540
541 This is useful if you have to define a translation on a module
542 level but you need it to not translate until the time that it's
543 used as a string.
544 """
545 return LazyProxy(pass_to_ngettext, *args, **kwargs)
546
547
548def fake_ugettext_passthrough(string):
549 """
550 Fake a ugettext call for extraction's sake ;)
551
552 In wtforms there's a separate way to define a method to translate
553 things... so we just need to mark up the text so that it can be
554 extracted, not so that it's actually run through gettext.
555 """
556 return string
557
558
b9e9610b
CAW
559PAGINATION_DEFAULT_PER_PAGE = 30
560
ae85ed0f
BK
561class Pagination(object):
562 """
dffa0b09
CAW
563 Pagination class for mongodb queries.
564
565 Initialization through __init__(self, cursor, page=1, per_page=2),
566 get actual data slice through __call__().
ae85ed0f 567 """
ca3ca51c 568
af2fcba5
JW
569 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
570 jump_to_id=False):
44e3e917 571 """
a98d5254
CAW
572 Initializes Pagination
573
574 Args:
575 - page: requested page
576 - per_page: number of objects per page
577 - cursor: db cursor
af2fcba5
JW
578 - jump_to_id: ObjectId, sets the page to the page containing the object
579 with _id == jump_to_id.
44e3e917 580 """
af2fcba5 581 self.page = page
ca3ca51c
BK
582 self.per_page = per_page
583 self.cursor = cursor
ca3ca51c 584 self.total_count = self.cursor.count()
af2fcba5
JW
585 self.active_id = None
586
587 if jump_to_id:
588 cursor = copy.copy(self.cursor)
589
590 for (doc, increment) in izip(cursor, count(0)):
591 if doc['_id'] == jump_to_id:
592 self.page = 1 + int(floor(increment / self.per_page))
593
594 self.active_id = jump_to_id
595 break
596
ca3ca51c
BK
597
598 def __call__(self):
44e3e917 599 """
a98d5254 600 Returns slice of objects for the requested page
44e3e917 601 """
140e2102
CAW
602 return self.cursor.skip(
603 (self.page - 1) * self.per_page).limit(self.per_page)
ae85ed0f
BK
604
605 @property
606 def pages(self):
607 return int(ceil(self.total_count / float(self.per_page)))
608
609 @property
610 def has_prev(self):
611 return self.page > 1
612
613 @property
614 def has_next(self):
615 return self.page < self.pages
616
617 def iter_pages(self, left_edge=2, left_current=2,
618 right_current=5, right_edge=2):
619 last = 0
620 for num in xrange(1, self.pages + 1):
621 if num <= left_edge or \
622 (num > self.page - left_current - 1 and \
623 num < self.page + right_current) or \
624 num > self.pages - right_edge:
625 if last + 1 != num:
626 yield None
627 yield num
628 last = num
44e3e917 629
50c880ac 630 def get_page_url_explicit(self, base_url, get_params, page_no):
44e3e917 631 """
50c880ac 632 Get a page url by adding a page= parameter to the base url
44e3e917
BK
633 """
634 new_get_params = copy.copy(get_params or {})
635 new_get_params['page'] = page_no
636 return "%s?%s" % (
50c880ac
CAW
637 base_url, urllib.urlencode(new_get_params))
638
639 def get_page_url(self, request, page_no):
640 """
641 Get a new page url based of the request, and the new page number.
642
643 This is a nice wrapper around get_page_url_explicit()
644 """
645 return self.get_page_url_explicit(
646 request.path_info, request.GET, page_no)
b5017dba
CAW
647
648
649def gridify_list(this_list, num_cols=5):
650 """
651 Generates a list of lists where each sub-list's length depends on
652 the number of columns in the list
653 """
654 grid = []
655
656 # Figure out how many rows we should have
657 num_rows = int(ceil(float(len(this_list)) / num_cols))
658
659 for row_num in range(num_rows):
660 slice_min = row_num * num_cols
661 slice_max = (row_num + 1) * num_cols
662
663 row = this_list[slice_min:slice_max]
664
665 grid.append(row)
666
667 return grid
668
669
670def gridify_cursor(this_cursor, num_cols=5):
671 """
672 Generates a list of lists where each sub-list's length depends on
673 the number of columns in the list
674 """
675 return gridify_list(list(this_cursor), num_cols)
bae8f3d8
CAW
676
677
678def render_404(request):
679 """
680 Render a 404.
681 """
682 return render_to_response(
683 request, 'mediagoblin/404.html', {}, status=400)
502073f2
JW
684
685def delete_media_files(media):
686 """
687 Delete all files associated with a MediaEntry
688
689 Arguments:
690 - media: A MediaEntry document
691 """
692 for handle, listpath in media['media_files'].items():
693 mg_globals.public_store.delete_file(
694 listpath)
695
696 for attachment in media['attachment_files']:
697 mg_globals.public_store.delete_file(
698 attachment['filepath'])