mediagoblin/util.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from __future__ import division
  18
  19 from email.MIMEText import MIMEText
  20 import gettext
  21 import pkg_resources
  22 import smtplib
  23 import sys
  24 import re
  25 import urllib
  26 from math import ceil, floor
  27 import copy
  28 import wtforms
  29
  30 from babel.localedata import exists
  31 from babel.support import LazyProxy
  32 import jinja2
  33 import translitcodec
  34 from webob import Response, exc
  35 from lxml.html.clean import Cleaner
  36 import markdown
  37 from wtforms.form import Form
  38
  39 from mediagoblin import mg_globals
  40 from mediagoblin import messages
  41 from mediagoblin.db.util import ObjectId
  42
  43 from itertools import izip, count
  44
  45 DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
  46
  47 TESTS_ENABLED = False
  48
  49
  50 def _activate_testing():
  51     """
  52     Call this to activate testing in util.py
  53     """
  54     global TESTS_ENABLED
  55     TESTS_ENABLED = True
  56
  57
  58 def clear_test_buckets():
  59     """
  60     We store some things for testing purposes that should be cleared
  61     when we want a "clean slate" of information for our next round of
  62     tests.  Call this function to wipe all that stuff clean.
  63
  64     Also wipes out some other things we might redefine during testing,
  65     like the jinja envs.
  66     """
  67     global SETUP_JINJA_ENVS
  68     SETUP_JINJA_ENVS = {}
  69
  70     global EMAIL_TEST_INBOX
  71     global EMAIL_TEST_MBOX_INBOX
  72     EMAIL_TEST_INBOX = []
  73     EMAIL_TEST_MBOX_INBOX = []
  74
  75     clear_test_template_context()
  76
  77
  78 SETUP_JINJA_ENVS = {}
  79
  80
  81 def get_jinja_env(template_loader, locale):
  82     """
  83     Set up the Jinja environment,
  84
  85     (In the future we may have another system for providing theming;
  86     for now this is good enough.)
  87     """
  88     setup_gettext(locale)
  89
  90     # If we have a jinja environment set up with this locale, just
  91     # return that one.
  92     if locale in SETUP_JINJA_ENVS:
  93         return SETUP_JINJA_ENVS[locale]
  94
  95     template_env = jinja2.Environment(
  96         loader=template_loader, autoescape=True,
  97         extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
  98
  99     template_env.install_gettext_callables(
 100         mg_globals.translations.ugettext,
 101         mg_globals.translations.ungettext)
 102
 103     # All templates will know how to ...
 104     # ... fetch all waiting messages and remove them from the queue
 105     # ... construct a grid of thumbnails or other media
 106     template_env.globals['fetch_messages'] = messages.fetch_messages
 107     template_env.globals['gridify_list'] = gridify_list
 108     template_env.globals['gridify_cursor'] = gridify_cursor
 109
 110     if exists(locale):
 111         SETUP_JINJA_ENVS[locale] = template_env
 112
 113     return template_env
 114
 115
 116 # We'll store context information here when doing unit tests
 117 TEMPLATE_TEST_CONTEXT = {}
 118
 119
 120 def render_template(request, template_path, context):
 121     """
 122     Render a template with context.
 123
 124     Always inserts the request into the context, so you don't have to.
 125     Also stores the context if we're doing unit tests.  Helpful!
 126     """
 127     template = request.template_env.get_template(
 128         template_path)
 129     context['request'] = request
 130     rendered = template.render(context)
 131
 132     if TESTS_ENABLED:
 133         TEMPLATE_TEST_CONTEXT[template_path] = context
 134
 135     return rendered
 136
 137
 138 def clear_test_template_context():
 139     global TEMPLATE_TEST_CONTEXT
 140     TEMPLATE_TEST_CONTEXT = {}
 141
 142
 143 def render_to_response(request, template, context, status=200):
 144     """Much like Django's shortcut.render()"""
 145     return Response(
 146         render_template(request, template, context),
 147         status=status)
 148
 149
 150 def redirect(request, *args, **kwargs):
 151     """Returns a HTTPFound(), takes a request and then urlgen params"""
 152
 153     querystring = None
 154     if kwargs.get('querystring'):
 155         querystring = kwargs.get('querystring')
 156         del kwargs['querystring']
 157
 158     return exc.HTTPFound(
 159         location=''.join([
 160                 request.urlgen(*args, **kwargs),
 161                 querystring if querystring else '']))
 162
 163
 164 def setup_user_in_request(request):
 165     """
 166     Examine a request and tack on a request.user parameter if that's
 167     appropriate.
 168     """
 169     if not 'user_id' in request.session:
 170         request.user = None
 171         return
 172
 173     user = None
 174     user = request.app.db.User.one(
 175         {'_id': ObjectId(request.session['user_id'])})
 176
 177     if not user:
 178         # Something's wrong... this user doesn't exist?  Invalidate
 179         # this session.
 180         request.session.invalidate()
 181
 182     request.user = user
 183
 184
 185 def import_component(import_string):
 186     """
 187     Import a module component defined by STRING.  Probably a method,
 188     class, or global variable.
 189
 190     Args:
 191      - import_string: a string that defines what to import.  Written
 192        in the format of "module1.module2:component"
 193     """
 194     module_name, func_name = import_string.split(':', 1)
 195     __import__(module_name)
 196     module = sys.modules[module_name]
 197     func = getattr(module, func_name)
 198     return func
 199
 200 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
 201
 202
 203 def slugify(text, delim=u'-'):
 204     """
 205     Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
 206     """
 207     result = []
 208     for word in _punct_re.split(text.lower()):
 209         word = word.encode('translit/long')
 210         if word:
 211             result.append(word)
 212     return unicode(delim.join(result))
 213
 214 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 215 ### Special email test stuff begins HERE
 216 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 217
 218 # We have two "test inboxes" here:
 219 #
 220 # EMAIL_TEST_INBOX:
 221 # ----------------
 222 #   If you're writing test views, you'll probably want to check this.
 223 #   It contains a list of MIMEText messages.
 224 #
 225 # EMAIL_TEST_MBOX_INBOX:
 226 # ----------------------
 227 #   This collects the messages from the FakeMhost inbox.  It's reslly
 228 #   just here for testing the send_email method itself.
 229 #
 230 #   Anyway this contains:
 231 #    - from
 232 #    - to: a list of email recipient addresses
 233 #    - message: not just the body, but the whole message, including
 234 #      headers, etc.
 235 #
 236 # ***IMPORTANT!***
 237 # ----------------
 238 # Before running tests that call functions which send email, you should
 239 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
 240
 241 EMAIL_TEST_INBOX = []
 242 EMAIL_TEST_MBOX_INBOX = []
 243
 244
 245 class FakeMhost(object):
 246     """
 247     Just a fake mail host so we can capture and test messages
 248     from send_email
 249     """
 250     def login(self, *args, **kwargs):
 251         pass
 252
 253     def sendmail(self, from_addr, to_addrs, message):
 254         EMAIL_TEST_MBOX_INBOX.append(
 255             {'from': from_addr,
 256              'to': to_addrs,
 257              'message': message})
 258
 259
 260 def _clear_test_inboxes():
 261     global EMAIL_TEST_INBOX
 262     global EMAIL_TEST_MBOX_INBOX
 263     EMAIL_TEST_INBOX = []
 264     EMAIL_TEST_MBOX_INBOX = []
 265
 266 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 267 ### </Special email test stuff>
 268 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 269
 270
 271 def send_email(from_addr, to_addrs, subject, message_body):
 272     """
 273     Simple email sending wrapper, use this so we can capture messages
 274     for unit testing purposes.
 275
 276     Args:
 277      - from_addr: address you're sending the email from
 278      - to_addrs: list of recipient email addresses
 279      - subject: subject of the email
 280      - message_body: email body text
 281     """
 282     if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
 283         mhost = FakeMhost()
 284     elif not mg_globals.app_config['email_debug_mode']:
 285         mhost = smtplib.SMTP(
 286             mg_globals.app_config['email_smtp_host'],
 287             mg_globals.app_config['email_smtp_port'])
 288
 289         # SMTP.__init__ Issues SMTP.connect implicitly if host
 290         if not mg_globals.app_config['email_smtp_host']:  # e.g. host = ''
 291             mhost.connect()  # We SMTP.connect explicitly
 292
 293     if mg_globals.app_config['email_smtp_user'] \
 294             or mg_globals.app_config['email_smtp_pass']:
 295         mhost.login(
 296             mg_globals.app_config['email_smtp_user'],
 297             mg_globals.app_config['email_smtp_pass'])
 298
 299     message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
 300     message['Subject'] = subject
 301     message['From'] = from_addr
 302     message['To'] = ', '.join(to_addrs)
 303
 304     if TESTS_ENABLED:
 305         EMAIL_TEST_INBOX.append(message)
 306
 307     if mg_globals.app_config['email_debug_mode']:
 308         print u"===== Email ====="
 309         print u"From address: %s" % message['From']
 310         print u"To addresses: %s" % message['To']
 311         print u"Subject: %s" % message['Subject']
 312         print u"-- Body: --"
 313         print message.get_payload(decode=True)
 314
 315     return mhost.sendmail(from_addr, to_addrs, message.as_string())
 316
 317
 318 ###################
 319 # Translation tools
 320 ###################
 321
 322
 323 TRANSLATIONS_PATH = pkg_resources.resource_filename(
 324     'mediagoblin', 'i18n')
 325
 326
 327 def locale_to_lower_upper(locale):
 328     """
 329     Take a locale, regardless of style, and format it like "en-us"
 330     """
 331     if '-' in locale:
 332         lang, country = locale.split('-', 1)
 333         return '%s_%s' % (lang.lower(), country.upper())
 334     elif '_' in locale:
 335         lang, country = locale.split('_', 1)
 336         return '%s_%s' % (lang.lower(), country.upper())
 337     else:
 338         return locale.lower()
 339
 340
 341 def locale_to_lower_lower(locale):
 342     """
 343     Take a locale, regardless of style, and format it like "en_US"
 344     """
 345     if '_' in locale:
 346         lang, country = locale.split('_', 1)
 347         return '%s-%s' % (lang.lower(), country.lower())
 348     else:
 349         return locale.lower()
 350
 351
 352 def get_locale_from_request(request):
 353     """
 354     Figure out what target language is most appropriate based on the
 355     request
 356     """
 357     request_form = request.GET or request.POST
 358
 359     if 'lang' in request_form:
 360         return locale_to_lower_upper(request_form['lang'])
 361
 362     accept_lang_matches = request.accept_language.best_matches()
 363
 364     # Your routing can explicitly specify a target language
 365     matchdict = request.matchdict or {}
 366
 367     if 'locale' in matchdict:
 368         target_lang = matchdict['locale']
 369     elif 'target_lang' in request.session:
 370         target_lang = request.session['target_lang']
 371     # Pull the first acceptable language
 372     elif accept_lang_matches:
 373         target_lang = accept_lang_matches[0]
 374     # Fall back to English
 375     else:
 376         target_lang = 'en'
 377
 378     return locale_to_lower_upper(target_lang)
 379
 380
 381 # A super strict version of the lxml.html cleaner class
 382 HTML_CLEANER = Cleaner(
 383     scripts=True,
 384     javascript=True,
 385     comments=True,
 386     style=True,
 387     links=True,
 388     page_structure=True,
 389     processing_instructions=True,
 390     embedded=True,
 391     frames=True,
 392     forms=True,
 393     annoying_tags=True,
 394     allow_tags=[
 395         'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
 396     remove_unknown_tags=False,  # can't be used with allow_tags
 397     safe_attrs_only=True,
 398     add_nofollow=True,  # for now
 399     host_whitelist=(),
 400     whitelist_tags=set([]))
 401
 402
 403 def clean_html(html):
 404     # clean_html barfs on an empty string
 405     if not html:
 406         return u''
 407
 408     return HTML_CLEANER.clean_html(html)
 409
 410
 411 def convert_to_tag_list_of_dicts(tag_string):
 412     """
 413     Filter input from incoming string containing user tags,
 414
 415     Strips trailing, leading, and internal whitespace, and also converts
 416     the "tags" text into an array of tags
 417     """
 418     taglist = []
 419     if tag_string:
 420
 421         # Strip out internal, trailing, and leading whitespace
 422         stripped_tag_string = u' '.join(tag_string.strip().split())
 423
 424         # Split the tag string into a list of tags
 425         for tag in stripped_tag_string.split(
 426                 mg_globals.app_config['tags_delimiter']):
 427
 428             # Ignore empty or duplicate tags
 429             if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
 430
 431                 taglist.append({'name': tag.strip(),
 432                                 'slug': slugify(tag.strip())})
 433     return taglist
 434
 435
 436 def media_tags_as_string(media_entry_tags):
 437     """
 438     Generate a string from a media item's tags, stored as a list of dicts
 439
 440     This is the opposite of convert_to_tag_list_of_dicts
 441     """
 442     media_tag_string = ''
 443     if media_entry_tags:
 444         media_tag_string = mg_globals.app_config['tags_delimiter'].join(
 445             [tag['name'] for tag in media_entry_tags])
 446     return media_tag_string
 447
 448 TOO_LONG_TAG_WARNING = \
 449     u'Tags must be shorter than %s characters.  Tags that are too long: %s'
 450
 451
 452 def tag_length_validator(form, field):
 453     """
 454     Make sure tags do not exceed the maximum tag length.
 455     """
 456     tags = convert_to_tag_list_of_dicts(field.data)
 457     too_long_tags = [
 458         tag['name'] for tag in tags
 459         if len(tag['name']) > mg_globals.app_config['tags_max_length']]
 460
 461     if too_long_tags:
 462         raise wtforms.ValidationError(
 463             TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
 464                                     ', '.join(too_long_tags)))
 465
 466
 467 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
 468
 469
 470 def cleaned_markdown_conversion(text):
 471     """
 472     Take a block of text, run it through MarkDown, and clean its HTML.
 473     """
 474     # Markdown will do nothing with and clean_html can do nothing with
 475     # an empty string :)
 476     if not text:
 477         return u''
 478
 479     return clean_html(MARKDOWN_INSTANCE.convert(text))
 480
 481
 482 SETUP_GETTEXTS = {}
 483
 484
 485 def setup_gettext(locale):
 486     """
 487     Setup the gettext instance based on this locale
 488     """
 489     # Later on when we have plugins we may want to enable the
 490     # multi-translations system they have so we can handle plugin
 491     # translations too
 492
 493     # TODO: fallback nicely on translations from pt_PT to pt if not
 494     # available, etc.
 495     if locale in SETUP_GETTEXTS:
 496         this_gettext = SETUP_GETTEXTS[locale]
 497     else:
 498         this_gettext = gettext.translation(
 499             'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
 500         if exists(locale):
 501             SETUP_GETTEXTS[locale] = this_gettext
 502
 503     mg_globals.setup_globals(
 504         translations=this_gettext)
 505
 506
 507 # Force en to be setup before anything else so that
 508 # mg_globals.translations is never None
 509 setup_gettext('en')
 510
 511
 512 def pass_to_ugettext(*args, **kwargs):
 513     """
 514     Pass a translation on to the appropriate ugettext method.
 515
 516     The reason we can't have a global ugettext method is because
 517     mg_globals gets swapped out by the application per-request.
 518     """
 519     return mg_globals.translations.ugettext(
 520         *args, **kwargs)
 521
 522
 523 def lazy_pass_to_ugettext(*args, **kwargs):
 524     """
 525     Lazily pass to ugettext.
 526
 527     This is useful if you have to define a translation on a module
 528     level but you need it to not translate until the time that it's
 529     used as a string.
 530     """
 531     return LazyProxy(pass_to_ugettext, *args, **kwargs)
 532
 533
 534 def pass_to_ngettext(*args, **kwargs):
 535     """
 536     Pass a translation on to the appropriate ngettext method.
 537
 538     The reason we can't have a global ngettext method is because
 539     mg_globals gets swapped out by the application per-request.
 540     """
 541     return mg_globals.translations.ngettext(
 542         *args, **kwargs)
 543
 544
 545 def lazy_pass_to_ngettext(*args, **kwargs):
 546     """
 547     Lazily pass to ngettext.
 548
 549     This is useful if you have to define a translation on a module
 550     level but you need it to not translate until the time that it's
 551     used as a string.
 552     """
 553     return LazyProxy(pass_to_ngettext, *args, **kwargs)
 554
 555
 556 def fake_ugettext_passthrough(string):
 557     """
 558     Fake a ugettext call for extraction's sake ;)
 559
 560     In wtforms there's a separate way to define a method to translate
 561     things... so we just need to mark up the text so that it can be
 562     extracted, not so that it's actually run through gettext.
 563     """
 564     return string
 565
 566
 567 PAGINATION_DEFAULT_PER_PAGE = 30
 568
 569
 570 class Pagination(object):
 571     """
 572     Pagination class for mongodb queries.
 573
 574     Initialization through __init__(self, cursor, page=1, per_page=2),
 575     get actual data slice through __call__().
 576     """
 577
 578     def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
 579                  jump_to_id=False):
 580         """
 581         Initializes Pagination
 582
 583         Args:
 584          - page: requested page
 585          - per_page: number of objects per page
 586          - cursor: db cursor
 587          - jump_to_id: ObjectId, sets the page to the page containing the
 588            object with _id == jump_to_id.
 589         """
 590         self.page = page
 591         self.per_page = per_page
 592         self.cursor = cursor
 593         self.total_count = self.cursor.count()
 594         self.active_id = None
 595
 596         if jump_to_id:
 597             cursor = copy.copy(self.cursor)
 598
 599             for (doc, increment) in izip(cursor, count(0)):
 600                 if doc['_id'] == jump_to_id:
 601                     self.page = 1 + int(floor(increment / self.per_page))
 602
 603                     self.active_id = jump_to_id
 604                     break
 605
 606     def __call__(self):
 607         """
 608         Returns slice of objects for the requested page
 609         """
 610         return self.cursor.skip(
 611             (self.page - 1) * self.per_page).limit(self.per_page)
 612
 613     @property
 614     def pages(self):
 615         return int(ceil(self.total_count / float(self.per_page)))
 616
 617     @property
 618     def has_prev(self):
 619         return self.page > 1
 620
 621     @property
 622     def has_next(self):
 623         return self.page < self.pages
 624
 625     def iter_pages(self, left_edge=2, left_current=2,
 626                    right_current=5, right_edge=2):
 627         last = 0
 628         for num in xrange(1, self.pages + 1):
 629             if num <= left_edge or \
 630                (num > self.page - left_current - 1 and \
 631                 num < self.page + right_current) or \
 632                num > self.pages - right_edge:
 633                 if last + 1 != num:
 634                     yield None
 635                 yield num
 636                 last = num
 637
 638     def get_page_url_explicit(self, base_url, get_params, page_no):
 639         """Get a page url by adding a page= parameter to the base url
 640         """
 641         new_get_params = copy.copy(get_params or {})
 642         new_get_params['page'] = page_no
 643         return "%s?%s" % (
 644             base_url, urllib.urlencode(new_get_params))
 645
 646     def get_page_url(self, request, page_no):
 647         """Get a new page url based of the request, and the new page number.
 648
 649         This is a nice wrapper around get_page_url_explicit()
 650         """
 651         return self.get_page_url_explicit(
 652             request.path_info, request.GET, page_no)
 653
 654
 655 def gridify_list(this_list, num_cols=5):
 656     """
 657     Generates a list of lists where each sub-list's length depends on
 658     the number of columns in the list
 659     """
 660     grid = []
 661
 662     # Figure out how many rows we should have
 663     num_rows = int(ceil(float(len(this_list)) / num_cols))
 664
 665     for row_num in range(num_rows):
 666         slice_min = row_num * num_cols
 667         slice_max = (row_num + 1) * num_cols
 668
 669         row = this_list[slice_min:slice_max]
 670
 671         grid.append(row)
 672
 673     return grid
 674
 675
 676 def gridify_cursor(this_cursor, num_cols=5):
 677     """
 678     Generates a list of lists where each sub-list's length depends on
 679     the number of columns in the list
 680     """
 681     return gridify_list(list(this_cursor), num_cols)
 682
 683
 684 def render_404(request):
 685     """
 686     Render a 404.
 687     """
 688     return render_to_response(
 689         request, 'mediagoblin/404.html', {}, status=400)
 690
 691
 692 def delete_media_files(media):
 693     """
 694     Delete all files associated with a MediaEntry
 695
 696     Arguments:
 697      - media: A MediaEntry document
 698     """
 699     for listpath in media['media_files'].itervalues():
 700         mg_globals.public_store.delete_file(
 701             listpath)
 702
 703     for attachment in media['attachment_files']:
 704         mg_globals.public_store.delete_file(
 705             attachment['filepath'])