mediagoblin/util.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011 Free Software Foundation, Inc
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from __future__ import division
  18
  19 from email.MIMEText import MIMEText
  20 import gettext
  21 import pkg_resources
  22 import smtplib
  23 import sys
  24 import re
  25 import urllib
  26 from math import ceil, floor
  27 import copy
  28 import wtforms
  29
  30 from babel.localedata import exists
  31 from babel.support import LazyProxy
  32 import jinja2
  33 import translitcodec
  34 from webob import Response, exc
  35 from lxml.html.clean import Cleaner
  36 import markdown
  37 from wtforms.form import Form
  38
  39 from mediagoblin import mg_globals
  40 from mediagoblin import messages
  41 from mediagoblin.db.util import ObjectId
  42
  43 from itertools import izip, count
  44
  45 DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
  46
  47 TESTS_ENABLED = False
  48 def _activate_testing():
  49     """
  50     Call this to activate testing in util.py
  51     """
  52     global TESTS_ENABLED
  53     TESTS_ENABLED = True
  54
  55
  56 def clear_test_buckets():
  57     """
  58     We store some things for testing purposes that should be cleared
  59     when we want a "clean slate" of information for our next round of
  60     tests.  Call this function to wipe all that stuff clean.
  61
  62     Also wipes out some other things we might redefine during testing,
  63     like the jinja envs.
  64     """
  65     global SETUP_JINJA_ENVS
  66     SETUP_JINJA_ENVS = {}
  67
  68     global EMAIL_TEST_INBOX
  69     global EMAIL_TEST_MBOX_INBOX
  70     EMAIL_TEST_INBOX = []
  71     EMAIL_TEST_MBOX_INBOX = []
  72
  73     clear_test_template_context()
  74
  75
  76 SETUP_JINJA_ENVS = {}
  77
  78
  79 def get_jinja_env(template_loader, locale):
  80     """
  81     Set up the Jinja environment,
  82
  83     (In the future we may have another system for providing theming;
  84     for now this is good enough.)
  85     """
  86     setup_gettext(locale)
  87
  88     # If we have a jinja environment set up with this locale, just
  89     # return that one.
  90     if SETUP_JINJA_ENVS.has_key(locale):
  91         return SETUP_JINJA_ENVS[locale]
  92
  93     template_env = jinja2.Environment(
  94         loader=template_loader, autoescape=True,
  95         extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
  96
  97     template_env.install_gettext_callables(
  98         mg_globals.translations.ugettext,
  99         mg_globals.translations.ungettext)
 100
 101     # All templates will know how to ...
 102     # ... fetch all waiting messages and remove them from the queue
 103     # ... construct a grid of thumbnails or other media
 104     template_env.globals['fetch_messages'] = messages.fetch_messages
 105     template_env.globals['gridify_list'] = gridify_list
 106     template_env.globals['gridify_cursor'] = gridify_cursor
 107
 108     if exists(locale):
 109         SETUP_JINJA_ENVS[locale] = template_env
 110
 111     return template_env
 112
 113
 114 # We'll store context information here when doing unit tests
 115 TEMPLATE_TEST_CONTEXT = {}
 116
 117
 118 def render_template(request, template_path, context):
 119     """
 120     Render a template with context.
 121
 122     Always inserts the request into the context, so you don't have to.
 123     Also stores the context if we're doing unit tests.  Helpful!
 124     """
 125     template = request.template_env.get_template(
 126         template_path)
 127     context['request'] = request
 128     rendered = template.render(context)
 129
 130     if TESTS_ENABLED:
 131         TEMPLATE_TEST_CONTEXT[template_path] = context
 132
 133     return rendered
 134
 135
 136 def clear_test_template_context():
 137     global TEMPLATE_TEST_CONTEXT
 138     TEMPLATE_TEST_CONTEXT = {}
 139
 140
 141 def render_to_response(request, template, context, status=200):
 142     """Much like Django's shortcut.render()"""
 143     return Response(
 144         render_template(request, template, context),
 145         status=status)
 146
 147
 148 def redirect(request, *args, **kwargs):
 149     """Returns a HTTPFound(), takes a request and then urlgen params"""
 150
 151     querystring = None
 152     if kwargs.get('querystring'):
 153         querystring = kwargs.get('querystring')
 154         del kwargs['querystring']
 155
 156     return exc.HTTPFound(
 157         location=''.join([
 158                 request.urlgen(*args, **kwargs),
 159                 querystring if querystring else '']))
 160
 161
 162 def setup_user_in_request(request):
 163     """
 164     Examine a request and tack on a request.user parameter if that's
 165     appropriate.
 166     """
 167     if not request.session.has_key('user_id'):
 168         request.user = None
 169         return
 170
 171     user = None
 172     user = request.app.db.User.one(
 173         {'_id': ObjectId(request.session['user_id'])})
 174
 175     if not user:
 176         # Something's wrong... this user doesn't exist?  Invalidate
 177         # this session.
 178         request.session.invalidate()
 179
 180     request.user = user
 181
 182
 183 def import_component(import_string):
 184     """
 185     Import a module component defined by STRING.  Probably a method,
 186     class, or global variable.
 187
 188     Args:
 189      - import_string: a string that defines what to import.  Written
 190        in the format of "module1.module2:component"
 191     """
 192     module_name, func_name = import_string.split(':', 1)
 193     __import__(module_name)
 194     module = sys.modules[module_name]
 195     func = getattr(module, func_name)
 196     return func
 197
 198 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
 199
 200 def slugify(text, delim=u'-'):
 201     """
 202     Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
 203     """
 204     result = []
 205     for word in _punct_re.split(text.lower()):
 206         word = word.encode('translit/long')
 207         if word:
 208             result.append(word)
 209     return unicode(delim.join(result))
 210
 211 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 212 ### Special email test stuff begins HERE
 213 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 214
 215 # We have two "test inboxes" here:
 216 #
 217 # EMAIL_TEST_INBOX:
 218 # ----------------
 219 #   If you're writing test views, you'll probably want to check this.
 220 #   It contains a list of MIMEText messages.
 221 #
 222 # EMAIL_TEST_MBOX_INBOX:
 223 # ----------------------
 224 #   This collects the messages from the FakeMhost inbox.  It's reslly
 225 #   just here for testing the send_email method itself.
 226 #
 227 #   Anyway this contains:
 228 #    - from
 229 #    - to: a list of email recipient addresses
 230 #    - message: not just the body, but the whole message, including
 231 #      headers, etc.
 232 #
 233 # ***IMPORTANT!***
 234 # ----------------
 235 # Before running tests that call functions which send email, you should
 236 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
 237
 238 EMAIL_TEST_INBOX = []
 239 EMAIL_TEST_MBOX_INBOX = []
 240
 241
 242 class FakeMhost(object):
 243     """
 244     Just a fake mail host so we can capture and test messages
 245     from send_email
 246     """
 247     def connect(self):
 248         pass
 249
 250     def sendmail(self, from_addr, to_addrs, message):
 251         EMAIL_TEST_MBOX_INBOX.append(
 252             {'from': from_addr,
 253              'to': to_addrs,
 254              'message': message})
 255
 256 def _clear_test_inboxes():
 257     global EMAIL_TEST_INBOX
 258     global EMAIL_TEST_MBOX_INBOX
 259     EMAIL_TEST_INBOX = []
 260     EMAIL_TEST_MBOX_INBOX = []
 261
 262 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 263 ### </Special email test stuff>
 264 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 265
 266 def send_email(from_addr, to_addrs, subject, message_body):
 267     """
 268     Simple email sending wrapper, use this so we can capture messages
 269     for unit testing purposes.
 270
 271     Args:
 272      - from_addr: address you're sending the email from
 273      - to_addrs: list of recipient email addresses
 274      - subject: subject of the email
 275      - message_body: email body text
 276     """
 277     # TODO: make a mock mhost if testing is enabled
 278     if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
 279         mhost = FakeMhost()
 280     elif not mg_globals.app_config['email_debug_mode']:
 281         mhost = smtplib.SMTP()
 282
 283     mhost.connect()
 284
 285     message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
 286     message['Subject'] = subject
 287     message['From'] = from_addr
 288     message['To'] = ', '.join(to_addrs)
 289
 290     if TESTS_ENABLED:
 291         EMAIL_TEST_INBOX.append(message)
 292
 293     if mg_globals.app_config['email_debug_mode']:
 294         print u"===== Email ====="
 295         print u"From address: %s" % message['From']
 296         print u"To addresses: %s" % message['To']
 297         print u"Subject: %s" % message['Subject']
 298         print u"-- Body: --"
 299         print message.get_payload(decode=True)
 300
 301     return mhost.sendmail(from_addr, to_addrs, message.as_string())
 302
 303
 304 ###################
 305 # Translation tools
 306 ###################
 307
 308
 309 TRANSLATIONS_PATH = pkg_resources.resource_filename(
 310     'mediagoblin', 'i18n')
 311
 312
 313 def locale_to_lower_upper(locale):
 314     """
 315     Take a locale, regardless of style, and format it like "en-us"
 316     """
 317     if '-' in locale:
 318         lang, country = locale.split('-', 1)
 319         return '%s_%s' % (lang.lower(), country.upper())
 320     elif '_' in locale:
 321         lang, country = locale.split('_', 1)
 322         return '%s_%s' % (lang.lower(), country.upper())
 323     else:
 324         return locale.lower()
 325
 326
 327 def locale_to_lower_lower(locale):
 328     """
 329     Take a locale, regardless of style, and format it like "en_US"
 330     """
 331     if '_' in locale:
 332         lang, country = locale.split('_', 1)
 333         return '%s-%s' % (lang.lower(), country.lower())
 334     else:
 335         return locale.lower()
 336
 337
 338 def get_locale_from_request(request):
 339     """
 340     Figure out what target language is most appropriate based on the
 341     request
 342     """
 343     request_form = request.GET or request.POST
 344
 345     if request_form.has_key('lang'):
 346         return locale_to_lower_upper(request_form['lang'])
 347
 348     accept_lang_matches = request.accept_language.best_matches()
 349
 350     # Your routing can explicitly specify a target language
 351     matchdict = request.matchdict or {}
 352
 353     if matchdict.has_key('locale'):
 354         target_lang = matchdict['locale']
 355     elif request.session.has_key('target_lang'):
 356         target_lang = request.session['target_lang']
 357     # Pull the first acceptable language
 358     elif accept_lang_matches:
 359         target_lang = accept_lang_matches[0]
 360     # Fall back to English
 361     else:
 362         target_lang = 'en'
 363
 364     return locale_to_lower_upper(target_lang)
 365
 366
 367 # A super strict version of the lxml.html cleaner class
 368 HTML_CLEANER = Cleaner(
 369     scripts=True,
 370     javascript=True,
 371     comments=True,
 372     style=True,
 373     links=True,
 374     page_structure=True,
 375     processing_instructions=True,
 376     embedded=True,
 377     frames=True,
 378     forms=True,
 379     annoying_tags=True,
 380     allow_tags=[
 381         'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
 382     remove_unknown_tags=False, # can't be used with allow_tags
 383     safe_attrs_only=True,
 384     add_nofollow=True, # for now
 385     host_whitelist=(),
 386     whitelist_tags=set([]))
 387
 388
 389 def clean_html(html):
 390     # clean_html barfs on an empty string
 391     if not html:
 392         return u''
 393
 394     return HTML_CLEANER.clean_html(html)
 395
 396
 397 def convert_to_tag_list_of_dicts(tag_string):
 398     """
 399     Filter input from incoming string containing user tags,
 400
 401     Strips trailing, leading, and internal whitespace, and also converts
 402     the "tags" text into an array of tags
 403     """
 404     taglist = []
 405     if tag_string:
 406
 407         # Strip out internal, trailing, and leading whitespace
 408         stripped_tag_string = u' '.join(tag_string.strip().split())
 409
 410         # Split the tag string into a list of tags
 411         for tag in stripped_tag_string.split(
 412                                        mg_globals.app_config['tags_delimiter']):
 413
 414             # Ignore empty or duplicate tags
 415             if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
 416
 417                 taglist.append({'name': tag.strip(),
 418                                 'slug': slugify(tag.strip())})
 419     return taglist
 420
 421
 422 def media_tags_as_string(media_entry_tags):
 423     """
 424     Generate a string from a media item's tags, stored as a list of dicts
 425
 426     This is the opposite of convert_to_tag_list_of_dicts
 427     """
 428     media_tag_string = ''
 429     if media_entry_tags:
 430         media_tag_string = mg_globals.app_config['tags_delimiter'].join(
 431                                       [tag['name'] for tag in media_entry_tags])
 432     return media_tag_string
 433
 434 TOO_LONG_TAG_WARNING = \
 435     u'Tags must be shorter than %s characters.  Tags that are too long: %s'
 436
 437 def tag_length_validator(form, field):
 438     """
 439     Make sure tags do not exceed the maximum tag length.
 440     """
 441     tags = convert_to_tag_list_of_dicts(field.data)
 442     too_long_tags = [
 443         tag['name'] for tag in tags
 444         if len(tag['name']) > mg_globals.app_config['tags_max_length']]
 445
 446     if too_long_tags:
 447         raise wtforms.ValidationError(
 448             TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
 449                                     ', '.join(too_long_tags)))
 450
 451
 452 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
 453
 454 def cleaned_markdown_conversion(text):
 455     """
 456     Take a block of text, run it through MarkDown, and clean its HTML.
 457     """
 458     # Markdown will do nothing with and clean_html can do nothing with
 459     # an empty string :)
 460     if not text:
 461         return u''
 462
 463     return clean_html(MARKDOWN_INSTANCE.convert(text))
 464
 465
 466 SETUP_GETTEXTS = {}
 467
 468 def setup_gettext(locale):
 469     """
 470     Setup the gettext instance based on this locale
 471     """
 472     # Later on when we have plugins we may want to enable the
 473     # multi-translations system they have so we can handle plugin
 474     # translations too
 475
 476     # TODO: fallback nicely on translations from pt_PT to pt if not
 477     # available, etc.
 478     if SETUP_GETTEXTS.has_key(locale):
 479         this_gettext = SETUP_GETTEXTS[locale]
 480     else:
 481         this_gettext = gettext.translation(
 482             'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
 483         if exists(locale):
 484             SETUP_GETTEXTS[locale] = this_gettext
 485
 486     mg_globals.setup_globals(
 487         translations=this_gettext)
 488
 489
 490 # Force en to be setup before anything else so that
 491 # mg_globals.translations is never None
 492 setup_gettext('en')
 493
 494
 495 def pass_to_ugettext(*args, **kwargs):
 496     """
 497     Pass a translation on to the appropriate ugettext method.
 498
 499     The reason we can't have a global ugettext method is because
 500     mg_globals gets swapped out by the application per-request.
 501     """
 502     return mg_globals.translations.ugettext(
 503         *args, **kwargs)
 504
 505
 506 def lazy_pass_to_ugettext(*args, **kwargs):
 507     """
 508     Lazily pass to ugettext.
 509
 510     This is useful if you have to define a translation on a module
 511     level but you need it to not translate until the time that it's
 512     used as a string.
 513     """
 514     return LazyProxy(pass_to_ugettext, *args, **kwargs)
 515
 516
 517 def pass_to_ngettext(*args, **kwargs):
 518     """
 519     Pass a translation on to the appropriate ngettext method.
 520
 521     The reason we can't have a global ngettext method is because
 522     mg_globals gets swapped out by the application per-request.
 523     """
 524     return mg_globals.translations.ngettext(
 525         *args, **kwargs)
 526
 527
 528 def lazy_pass_to_ngettext(*args, **kwargs):
 529     """
 530     Lazily pass to ngettext.
 531
 532     This is useful if you have to define a translation on a module
 533     level but you need it to not translate until the time that it's
 534     used as a string.
 535     """
 536     return LazyProxy(pass_to_ngettext, *args, **kwargs)
 537
 538
 539 def fake_ugettext_passthrough(string):
 540     """
 541     Fake a ugettext call for extraction's sake ;)
 542
 543     In wtforms there's a separate way to define a method to translate
 544     things... so we just need to mark up the text so that it can be
 545     extracted, not so that it's actually run through gettext.
 546     """
 547     return string
 548
 549
 550 PAGINATION_DEFAULT_PER_PAGE = 30
 551
 552 class Pagination(object):
 553     """
 554     Pagination class for mongodb queries.
 555
 556     Initialization through __init__(self, cursor, page=1, per_page=2),
 557     get actual data slice through __call__().
 558     """
 559
 560     def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
 561                  jump_to_id=False):
 562         """
 563         Initializes Pagination
 564
 565         Args:
 566          - page: requested page
 567          - per_page: number of objects per page
 568          - cursor: db cursor
 569          - jump_to_id: ObjectId, sets the page to the page containing the object
 570            with _id == jump_to_id.
 571         """
 572         self.page = page
 573         self.per_page = per_page
 574         self.cursor = cursor
 575         self.total_count = self.cursor.count()
 576         self.active_id = None
 577
 578         if jump_to_id:
 579             cursor = copy.copy(self.cursor)
 580
 581             for (doc, increment) in izip(cursor, count(0)):
 582                 if doc['_id'] == jump_to_id:
 583                     self.page = 1 + int(floor(increment / self.per_page))
 584
 585                     self.active_id = jump_to_id
 586                     break
 587
 588
 589     def __call__(self):
 590         """
 591         Returns slice of objects for the requested page
 592         """
 593         return self.cursor.skip(
 594             (self.page - 1) * self.per_page).limit(self.per_page)
 595
 596     @property
 597     def pages(self):
 598         return int(ceil(self.total_count / float(self.per_page)))
 599
 600     @property
 601     def has_prev(self):
 602         return self.page > 1
 603
 604     @property
 605     def has_next(self):
 606         return self.page < self.pages
 607
 608     def iter_pages(self, left_edge=2, left_current=2,
 609                    right_current=5, right_edge=2):
 610         last = 0
 611         for num in xrange(1, self.pages + 1):
 612             if num <= left_edge or \
 613                (num > self.page - left_current - 1 and \
 614                 num < self.page + right_current) or \
 615                num > self.pages - right_edge:
 616                 if last + 1 != num:
 617                     yield None
 618                 yield num
 619                 last = num
 620
 621     def get_page_url_explicit(self, base_url, get_params, page_no):
 622         """
 623         Get a page url by adding a page= parameter to the base url
 624         """
 625         new_get_params = copy.copy(get_params or {})
 626         new_get_params['page'] = page_no
 627         return "%s?%s" % (
 628             base_url, urllib.urlencode(new_get_params))
 629
 630     def get_page_url(self, request, page_no):
 631         """
 632         Get a new page url based of the request, and the new page number.
 633
 634         This is a nice wrapper around get_page_url_explicit()
 635         """
 636         return self.get_page_url_explicit(
 637             request.path_info, request.GET, page_no)
 638
 639
 640 def gridify_list(this_list, num_cols=5):
 641     """
 642     Generates a list of lists where each sub-list's length depends on
 643     the number of columns in the list
 644     """
 645     grid = []
 646
 647     # Figure out how many rows we should have
 648     num_rows = int(ceil(float(len(this_list)) / num_cols))
 649
 650     for row_num in range(num_rows):
 651         slice_min = row_num * num_cols
 652         slice_max = (row_num + 1) * num_cols
 653
 654         row = this_list[slice_min:slice_max]
 655
 656         grid.append(row)
 657
 658     return grid
 659
 660
 661 def gridify_cursor(this_cursor, num_cols=5):
 662     """
 663     Generates a list of lists where each sub-list's length depends on
 664     the number of columns in the list
 665     """
 666     return gridify_list(list(this_cursor), num_cols)
 667
 668
 669 def render_404(request):
 670     """
 671     Render a 404.
 672     """
 673     return render_to_response(
 674         request, 'mediagoblin/404.html', {}, status=400)