mediagoblin/util.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from __future__ import division
  18
  19 from email.MIMEText import MIMEText
  20 import gettext
  21 import pkg_resources
  22 import smtplib
  23 import sys
  24 import re
  25 import urllib
  26 from math import ceil, floor
  27 import copy
  28 import wtforms
  29
  30 from babel.localedata import exists
  31 from babel.support import LazyProxy
  32 import jinja2
  33 import translitcodec
  34 from webob import Response, exc
  35 from lxml.html.clean import Cleaner
  36 import markdown
  37 from wtforms.form import Form
  38
  39 from mediagoblin import mg_globals
  40 from mediagoblin import messages
  41 from mediagoblin.db.util import ObjectId
  42
  43 from itertools import izip, count
  44
  45 DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
  46
  47 TESTS_ENABLED = False
  48 def _activate_testing():
  49     """
  50     Call this to activate testing in util.py
  51     """
  52     global TESTS_ENABLED
  53     TESTS_ENABLED = True
  54
  55
  56 def clear_test_buckets():
  57     """
  58     We store some things for testing purposes that should be cleared
  59     when we want a "clean slate" of information for our next round of
  60     tests.  Call this function to wipe all that stuff clean.
  61
  62     Also wipes out some other things we might redefine during testing,
  63     like the jinja envs.
  64     """
  65     global SETUP_JINJA_ENVS
  66     SETUP_JINJA_ENVS = {}
  67
  68     global EMAIL_TEST_INBOX
  69     global EMAIL_TEST_MBOX_INBOX
  70     EMAIL_TEST_INBOX = []
  71     EMAIL_TEST_MBOX_INBOX = []
  72
  73     clear_test_template_context()
  74
  75
  76 SETUP_JINJA_ENVS = {}
  77
  78
  79 def get_jinja_env(template_loader, locale):
  80     """
  81     Set up the Jinja environment,
  82
  83     (In the future we may have another system for providing theming;
  84     for now this is good enough.)
  85     """
  86     setup_gettext(locale)
  87
  88     # If we have a jinja environment set up with this locale, just
  89     # return that one.
  90     if SETUP_JINJA_ENVS.has_key(locale):
  91         return SETUP_JINJA_ENVS[locale]
  92
  93     template_env = jinja2.Environment(
  94         loader=template_loader, autoescape=True,
  95         extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
  96
  97     template_env.install_gettext_callables(
  98         mg_globals.translations.ugettext,
  99         mg_globals.translations.ungettext)
 100
 101     # All templates will know how to ...
 102     # ... fetch all waiting messages and remove them from the queue
 103     # ... construct a grid of thumbnails or other media
 104     template_env.globals['fetch_messages'] = messages.fetch_messages
 105     template_env.globals['gridify_list'] = gridify_list
 106     template_env.globals['gridify_cursor'] = gridify_cursor
 107
 108     if exists(locale):
 109         SETUP_JINJA_ENVS[locale] = template_env
 110
 111     return template_env
 112
 113
 114 # We'll store context information here when doing unit tests
 115 TEMPLATE_TEST_CONTEXT = {}
 116
 117
 118 def render_template(request, template_path, context):
 119     """
 120     Render a template with context.
 121
 122     Always inserts the request into the context, so you don't have to.
 123     Also stores the context if we're doing unit tests.  Helpful!
 124     """
 125     template = request.template_env.get_template(
 126         template_path)
 127     context['request'] = request
 128     rendered = template.render(context)
 129
 130     if TESTS_ENABLED:
 131         TEMPLATE_TEST_CONTEXT[template_path] = context
 132
 133     return rendered
 134
 135
 136 def clear_test_template_context():
 137     global TEMPLATE_TEST_CONTEXT
 138     TEMPLATE_TEST_CONTEXT = {}
 139
 140
 141 def render_to_response(request, template, context, status=200):
 142     """Much like Django's shortcut.render()"""
 143     return Response(
 144         render_template(request, template, context),
 145         status=status)
 146
 147
 148 def redirect(request, *args, **kwargs):
 149     """Returns a HTTPFound(), takes a request and then urlgen params"""
 150
 151     querystring = None
 152     if kwargs.get('querystring'):
 153         querystring = kwargs.get('querystring')
 154         del kwargs['querystring']
 155
 156     return exc.HTTPFound(
 157         location=''.join([
 158                 request.urlgen(*args, **kwargs),
 159                 querystring if querystring else '']))
 160
 161
 162 def setup_user_in_request(request):
 163     """
 164     Examine a request and tack on a request.user parameter if that's
 165     appropriate.
 166     """
 167     if not request.session.has_key('user_id'):
 168         request.user = None
 169         return
 170
 171     user = None
 172     user = request.app.db.User.one(
 173         {'_id': ObjectId(request.session['user_id'])})
 174
 175     if not user:
 176         # Something's wrong... this user doesn't exist?  Invalidate
 177         # this session.
 178         request.session.invalidate()
 179
 180     request.user = user
 181
 182
 183 def import_component(import_string):
 184     """
 185     Import a module component defined by STRING.  Probably a method,
 186     class, or global variable.
 187
 188     Args:
 189      - import_string: a string that defines what to import.  Written
 190        in the format of "module1.module2:component"
 191     """
 192     module_name, func_name = import_string.split(':', 1)
 193     __import__(module_name)
 194     module = sys.modules[module_name]
 195     func = getattr(module, func_name)
 196     return func
 197
 198 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
 199
 200 def slugify(text, delim=u'-'):
 201     """
 202     Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
 203     """
 204     result = []
 205     for word in _punct_re.split(text.lower()):
 206         word = word.encode('translit/long')
 207         if word:
 208             result.append(word)
 209     return unicode(delim.join(result))
 210
 211 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 212 ### Special email test stuff begins HERE
 213 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 214
 215 # We have two "test inboxes" here:
 216 #
 217 # EMAIL_TEST_INBOX:
 218 # ----------------
 219 #   If you're writing test views, you'll probably want to check this.
 220 #   It contains a list of MIMEText messages.
 221 #
 222 # EMAIL_TEST_MBOX_INBOX:
 223 # ----------------------
 224 #   This collects the messages from the FakeMhost inbox.  It's reslly
 225 #   just here for testing the send_email method itself.
 226 #
 227 #   Anyway this contains:
 228 #    - from
 229 #    - to: a list of email recipient addresses
 230 #    - message: not just the body, but the whole message, including
 231 #      headers, etc.
 232 #
 233 # ***IMPORTANT!***
 234 # ----------------
 235 # Before running tests that call functions which send email, you should
 236 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
 237
 238 EMAIL_TEST_INBOX = []
 239 EMAIL_TEST_MBOX_INBOX = []
 240
 241
 242 class FakeMhost(object):
 243     """
 244     Just a fake mail host so we can capture and test messages
 245     from send_email
 246     """
 247     def login(self, *args, **kwargs):
 248         pass
 249
 250     def sendmail(self, from_addr, to_addrs, message):
 251         EMAIL_TEST_MBOX_INBOX.append(
 252             {'from': from_addr,
 253              'to': to_addrs,
 254              'message': message})
 255
 256 def _clear_test_inboxes():
 257     global EMAIL_TEST_INBOX
 258     global EMAIL_TEST_MBOX_INBOX
 259     EMAIL_TEST_INBOX = []
 260     EMAIL_TEST_MBOX_INBOX = []
 261
 262 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 263 ### </Special email test stuff>
 264 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 265
 266 def send_email(from_addr, to_addrs, subject, message_body):
 267     """
 268     Simple email sending wrapper, use this so we can capture messages
 269     for unit testing purposes.
 270
 271     Args:
 272      - from_addr: address you're sending the email from
 273      - to_addrs: list of recipient email addresses
 274      - subject: subject of the email
 275      - message_body: email body text
 276     """
 277     if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
 278         mhost = FakeMhost()
 279     elif not mg_globals.app_config['email_debug_mode']:
 280         mhost = smtplib.SMTP(
 281             mg_globals.app_config['email_smtp_host'],
 282             mg_globals.app_config['email_smtp_port'])
 283
 284         # SMTP.__init__ Issues SMTP.connect implicitly if host
 285         if not mg_globals.app_config['email_smtp_host']:  # e.g. host = ''
 286             mhost.connect()  # We SMTP.connect explicitly
 287
 288     if mg_globals.app_config['email_smtp_user'] \
 289             or mg_globals.app_config['email_smtp_pass']:
 290         mhost.login(
 291             mg_globals.app_config['email_smtp_user'],
 292             mg_globals.app_config['email_smtp_pass'])
 293
 294     message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
 295     message['Subject'] = subject
 296     message['From'] = from_addr
 297     message['To'] = ', '.join(to_addrs)
 298
 299     if TESTS_ENABLED:
 300         EMAIL_TEST_INBOX.append(message)
 301
 302     if mg_globals.app_config['email_debug_mode']:
 303         print u"===== Email ====="
 304         print u"From address: %s" % message['From']
 305         print u"To addresses: %s" % message['To']
 306         print u"Subject: %s" % message['Subject']
 307         print u"-- Body: --"
 308         print message.get_payload(decode=True)
 309
 310     return mhost.sendmail(from_addr, to_addrs, message.as_string())
 311
 312
 313 ###################
 314 # Translation tools
 315 ###################
 316
 317
 318 TRANSLATIONS_PATH = pkg_resources.resource_filename(
 319     'mediagoblin', 'i18n')
 320
 321
 322 def locale_to_lower_upper(locale):
 323     """
 324     Take a locale, regardless of style, and format it like "en-us"
 325     """
 326     if '-' in locale:
 327         lang, country = locale.split('-', 1)
 328         return '%s_%s' % (lang.lower(), country.upper())
 329     elif '_' in locale:
 330         lang, country = locale.split('_', 1)
 331         return '%s_%s' % (lang.lower(), country.upper())
 332     else:
 333         return locale.lower()
 334
 335
 336 def locale_to_lower_lower(locale):
 337     """
 338     Take a locale, regardless of style, and format it like "en_US"
 339     """
 340     if '_' in locale:
 341         lang, country = locale.split('_', 1)
 342         return '%s-%s' % (lang.lower(), country.lower())
 343     else:
 344         return locale.lower()
 345
 346
 347 def get_locale_from_request(request):
 348     """
 349     Figure out what target language is most appropriate based on the
 350     request
 351     """
 352     request_form = request.GET or request.POST
 353
 354     if request_form.has_key('lang'):
 355         return locale_to_lower_upper(request_form['lang'])
 356
 357     accept_lang_matches = request.accept_language.best_matches()
 358
 359     # Your routing can explicitly specify a target language
 360     matchdict = request.matchdict or {}
 361
 362     if matchdict.has_key('locale'):
 363         target_lang = matchdict['locale']
 364     elif request.session.has_key('target_lang'):
 365         target_lang = request.session['target_lang']
 366     # Pull the first acceptable language
 367     elif accept_lang_matches:
 368         target_lang = accept_lang_matches[0]
 369     # Fall back to English
 370     else:
 371         target_lang = 'en'
 372
 373     return locale_to_lower_upper(target_lang)
 374
 375
 376 # A super strict version of the lxml.html cleaner class
 377 HTML_CLEANER = Cleaner(
 378     scripts=True,
 379     javascript=True,
 380     comments=True,
 381     style=True,
 382     links=True,
 383     page_structure=True,
 384     processing_instructions=True,
 385     embedded=True,
 386     frames=True,
 387     forms=True,
 388     annoying_tags=True,
 389     allow_tags=[
 390         'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
 391     remove_unknown_tags=False, # can't be used with allow_tags
 392     safe_attrs_only=True,
 393     add_nofollow=True, # for now
 394     host_whitelist=(),
 395     whitelist_tags=set([]))
 396
 397
 398 def clean_html(html):
 399     # clean_html barfs on an empty string
 400     if not html:
 401         return u''
 402
 403     return HTML_CLEANER.clean_html(html)
 404
 405
 406 def convert_to_tag_list_of_dicts(tag_string):
 407     """
 408     Filter input from incoming string containing user tags,
 409
 410     Strips trailing, leading, and internal whitespace, and also converts
 411     the "tags" text into an array of tags
 412     """
 413     taglist = []
 414     if tag_string:
 415
 416         # Strip out internal, trailing, and leading whitespace
 417         stripped_tag_string = u' '.join(tag_string.strip().split())
 418
 419         # Split the tag string into a list of tags
 420         for tag in stripped_tag_string.split(
 421                                        mg_globals.app_config['tags_delimiter']):
 422
 423             # Ignore empty or duplicate tags
 424             if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
 425
 426                 taglist.append({'name': tag.strip(),
 427                                 'slug': slugify(tag.strip())})
 428     return taglist
 429
 430
 431 def media_tags_as_string(media_entry_tags):
 432     """
 433     Generate a string from a media item's tags, stored as a list of dicts
 434
 435     This is the opposite of convert_to_tag_list_of_dicts
 436     """
 437     media_tag_string = ''
 438     if media_entry_tags:
 439         media_tag_string = mg_globals.app_config['tags_delimiter'].join(
 440                                       [tag['name'] for tag in media_entry_tags])
 441     return media_tag_string
 442
 443 TOO_LONG_TAG_WARNING = \
 444     u'Tags must be shorter than %s characters.  Tags that are too long: %s'
 445
 446 def tag_length_validator(form, field):
 447     """
 448     Make sure tags do not exceed the maximum tag length.
 449     """
 450     tags = convert_to_tag_list_of_dicts(field.data)
 451     too_long_tags = [
 452         tag['name'] for tag in tags
 453         if len(tag['name']) > mg_globals.app_config['tags_max_length']]
 454
 455     if too_long_tags:
 456         raise wtforms.ValidationError(
 457             TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
 458                                     ', '.join(too_long_tags)))
 459
 460
 461 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
 462
 463 def cleaned_markdown_conversion(text):
 464     """
 465     Take a block of text, run it through MarkDown, and clean its HTML.
 466     """
 467     # Markdown will do nothing with and clean_html can do nothing with
 468     # an empty string :)
 469     if not text:
 470         return u''
 471
 472     return clean_html(MARKDOWN_INSTANCE.convert(text))
 473
 474
 475 SETUP_GETTEXTS = {}
 476
 477 def setup_gettext(locale):
 478     """
 479     Setup the gettext instance based on this locale
 480     """
 481     # Later on when we have plugins we may want to enable the
 482     # multi-translations system they have so we can handle plugin
 483     # translations too
 484
 485     # TODO: fallback nicely on translations from pt_PT to pt if not
 486     # available, etc.
 487     if SETUP_GETTEXTS.has_key(locale):
 488         this_gettext = SETUP_GETTEXTS[locale]
 489     else:
 490         this_gettext = gettext.translation(
 491             'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
 492         if exists(locale):
 493             SETUP_GETTEXTS[locale] = this_gettext
 494
 495     mg_globals.setup_globals(
 496         translations=this_gettext)
 497
 498
 499 # Force en to be setup before anything else so that
 500 # mg_globals.translations is never None
 501 setup_gettext('en')
 502
 503
 504 def pass_to_ugettext(*args, **kwargs):
 505     """
 506     Pass a translation on to the appropriate ugettext method.
 507
 508     The reason we can't have a global ugettext method is because
 509     mg_globals gets swapped out by the application per-request.
 510     """
 511     return mg_globals.translations.ugettext(
 512         *args, **kwargs)
 513
 514
 515 def lazy_pass_to_ugettext(*args, **kwargs):
 516     """
 517     Lazily pass to ugettext.
 518
 519     This is useful if you have to define a translation on a module
 520     level but you need it to not translate until the time that it's
 521     used as a string.
 522     """
 523     return LazyProxy(pass_to_ugettext, *args, **kwargs)
 524
 525
 526 def pass_to_ngettext(*args, **kwargs):
 527     """
 528     Pass a translation on to the appropriate ngettext method.
 529
 530     The reason we can't have a global ngettext method is because
 531     mg_globals gets swapped out by the application per-request.
 532     """
 533     return mg_globals.translations.ngettext(
 534         *args, **kwargs)
 535
 536
 537 def lazy_pass_to_ngettext(*args, **kwargs):
 538     """
 539     Lazily pass to ngettext.
 540
 541     This is useful if you have to define a translation on a module
 542     level but you need it to not translate until the time that it's
 543     used as a string.
 544     """
 545     return LazyProxy(pass_to_ngettext, *args, **kwargs)
 546
 547
 548 def fake_ugettext_passthrough(string):
 549     """
 550     Fake a ugettext call for extraction's sake ;)
 551
 552     In wtforms there's a separate way to define a method to translate
 553     things... so we just need to mark up the text so that it can be
 554     extracted, not so that it's actually run through gettext.
 555     """
 556     return string
 557
 558
 559 PAGINATION_DEFAULT_PER_PAGE = 30
 560
 561 class Pagination(object):
 562     """
 563     Pagination class for mongodb queries.
 564
 565     Initialization through __init__(self, cursor, page=1, per_page=2),
 566     get actual data slice through __call__().
 567     """
 568
 569     def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
 570                  jump_to_id=False):
 571         """
 572         Initializes Pagination
 573
 574         Args:
 575          - page: requested page
 576          - per_page: number of objects per page
 577          - cursor: db cursor
 578          - jump_to_id: ObjectId, sets the page to the page containing the object
 579            with _id == jump_to_id.
 580         """
 581         self.page = page
 582         self.per_page = per_page
 583         self.cursor = cursor
 584         self.total_count = self.cursor.count()
 585         self.active_id = None
 586
 587         if jump_to_id:
 588             cursor = copy.copy(self.cursor)
 589
 590             for (doc, increment) in izip(cursor, count(0)):
 591                 if doc['_id'] == jump_to_id:
 592                     self.page = 1 + int(floor(increment / self.per_page))
 593
 594                     self.active_id = jump_to_id
 595                     break
 596
 597
 598     def __call__(self):
 599         """
 600         Returns slice of objects for the requested page
 601         """
 602         return self.cursor.skip(
 603             (self.page - 1) * self.per_page).limit(self.per_page)
 604
 605     @property
 606     def pages(self):
 607         return int(ceil(self.total_count / float(self.per_page)))
 608
 609     @property
 610     def has_prev(self):
 611         return self.page > 1
 612
 613     @property
 614     def has_next(self):
 615         return self.page < self.pages
 616
 617     def iter_pages(self, left_edge=2, left_current=2,
 618                    right_current=5, right_edge=2):
 619         last = 0
 620         for num in xrange(1, self.pages + 1):
 621             if num <= left_edge or \
 622                (num > self.page - left_current - 1 and \
 623                 num < self.page + right_current) or \
 624                num > self.pages - right_edge:
 625                 if last + 1 != num:
 626                     yield None
 627                 yield num
 628                 last = num
 629
 630     def get_page_url_explicit(self, base_url, get_params, page_no):
 631         """
 632         Get a page url by adding a page= parameter to the base url
 633         """
 634         new_get_params = copy.copy(get_params or {})
 635         new_get_params['page'] = page_no
 636         return "%s?%s" % (
 637             base_url, urllib.urlencode(new_get_params))
 638
 639     def get_page_url(self, request, page_no):
 640         """
 641         Get a new page url based of the request, and the new page number.
 642
 643         This is a nice wrapper around get_page_url_explicit()
 644         """
 645         return self.get_page_url_explicit(
 646             request.path_info, request.GET, page_no)
 647
 648
 649 def gridify_list(this_list, num_cols=5):
 650     """
 651     Generates a list of lists where each sub-list's length depends on
 652     the number of columns in the list
 653     """
 654     grid = []
 655
 656     # Figure out how many rows we should have
 657     num_rows = int(ceil(float(len(this_list)) / num_cols))
 658
 659     for row_num in range(num_rows):
 660         slice_min = row_num * num_cols
 661         slice_max = (row_num + 1) * num_cols
 662
 663         row = this_list[slice_min:slice_max]
 664
 665         grid.append(row)
 666
 667     return grid
 668
 669
 670 def gridify_cursor(this_cursor, num_cols=5):
 671     """
 672     Generates a list of lists where each sub-list's length depends on
 673     the number of columns in the list
 674     """
 675     return gridify_list(list(this_cursor), num_cols)
 676
 677
 678 def render_404(request):
 679     """
 680     Render a 404.
 681     """
 682     return render_to_response(
 683         request, 'mediagoblin/404.html', {}, status=400)
 684
 685 def delete_media_files(media):
 686     """
 687     Delete all files associated with a MediaEntry
 688
 689     Arguments:
 690      - media: A MediaEntry document
 691     """
 692     for handle, listpath in media['media_files'].items():
 693         mg_globals.public_store.delete_file(
 694             listpath)
 695
 696     for attachment in media['attachment_files']:
 697         mg_globals.public_store.delete_file(
 698             attachment['filepath'])