mediagoblin/util.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011 Free Software Foundation, Inc
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from email.MIMEText import MIMEText
  18 import gettext
  19 import pkg_resources
  20 import smtplib
  21 import sys
  22 import re
  23 import urllib
  24 from math import ceil
  25 from string import strip
  26 import copy
  27 import wtforms
  28
  29 from babel.localedata import exists
  30 import jinja2
  31 import translitcodec
  32 from webob import Response, exc
  33 from lxml.html.clean import Cleaner
  34 import markdown
  35
  36 from mediagoblin import mg_globals
  37 from mediagoblin import messages
  38 from mediagoblin.db.util import ObjectId
  39
  40 TESTS_ENABLED = False
  41 def _activate_testing():
  42     """
  43     Call this to activate testing in util.py
  44     """
  45     global TESTS_ENABLED
  46     TESTS_ENABLED = True
  47
  48
  49 def clear_test_buckets():
  50     """
  51     We store some things for testing purposes that should be cleared
  52     when we want a "clean slate" of information for our next round of
  53     tests.  Call this function to wipe all that stuff clean.
  54
  55     Also wipes out some other things we might redefine during testing,
  56     like the jinja envs.
  57     """
  58     global SETUP_JINJA_ENVS
  59     SETUP_JINJA_ENVS = {}
  60
  61     global EMAIL_TEST_INBOX
  62     global EMAIL_TEST_MBOX_INBOX
  63     EMAIL_TEST_INBOX = []
  64     EMAIL_TEST_MBOX_INBOX = []
  65
  66     clear_test_template_context()
  67
  68
  69 SETUP_JINJA_ENVS = {}
  70
  71
  72 def get_jinja_env(template_loader, locale):
  73     """
  74     Set up the Jinja environment,
  75
  76     (In the future we may have another system for providing theming;
  77     for now this is good enough.)
  78     """
  79     setup_gettext(locale)
  80
  81     # If we have a jinja environment set up with this locale, just
  82     # return that one.
  83     if SETUP_JINJA_ENVS.has_key(locale):
  84         return SETUP_JINJA_ENVS[locale]
  85
  86     template_env = jinja2.Environment(
  87         loader=template_loader, autoescape=True,
  88         extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
  89
  90     template_env.install_gettext_callables(
  91         mg_globals.translations.gettext,
  92         mg_globals.translations.ngettext)
  93
  94     # All templates will know how to ...
  95     # ... fetch all waiting messages and remove them from the queue
  96     template_env.globals['fetch_messages'] = messages.fetch_messages
  97
  98     if exists(locale):
  99         SETUP_JINJA_ENVS[locale] = template_env
 100
 101     return template_env
 102
 103
 104 # We'll store context information here when doing unit tests
 105 TEMPLATE_TEST_CONTEXT = {}
 106
 107
 108 def render_template(request, template_path, context):
 109     """
 110     Render a template with context.
 111
 112     Always inserts the request into the context, so you don't have to.
 113     Also stores the context if we're doing unit tests.  Helpful!
 114     """
 115     template = request.template_env.get_template(
 116         template_path)
 117     context['request'] = request
 118     rendered = template.render(context)
 119
 120     if TESTS_ENABLED:
 121         TEMPLATE_TEST_CONTEXT[template_path] = context
 122
 123     return rendered
 124
 125
 126 def clear_test_template_context():
 127     global TEMPLATE_TEST_CONTEXT
 128     TEMPLATE_TEST_CONTEXT = {}
 129
 130
 131 def render_to_response(request, template, context):
 132     """Much like Django's shortcut.render()"""
 133     return Response(render_template(request, template, context))
 134
 135
 136 def redirect(request, *args, **kwargs):
 137     """Returns a HTTPFound(), takes a request and then urlgen params"""
 138     return exc.HTTPFound(location=request.urlgen(*args, **kwargs))
 139
 140
 141 def setup_user_in_request(request):
 142     """
 143     Examine a request and tack on a request.user parameter if that's
 144     appropriate.
 145     """
 146     if not request.session.has_key('user_id'):
 147         request.user = None
 148         return
 149
 150     user = None
 151     user = request.app.db.User.one(
 152         {'_id': ObjectId(request.session['user_id'])})
 153
 154     if not user:
 155         # Something's wrong... this user doesn't exist?  Invalidate
 156         # this session.
 157         request.session.invalidate()
 158
 159     request.user = user
 160
 161
 162 def import_component(import_string):
 163     """
 164     Import a module component defined by STRING.  Probably a method,
 165     class, or global variable.
 166
 167     Args:
 168      - import_string: a string that defines what to import.  Written
 169        in the format of "module1.module2:component"
 170     """
 171     module_name, func_name = import_string.split(':', 1)
 172     __import__(module_name)
 173     module = sys.modules[module_name]
 174     func = getattr(module, func_name)
 175     return func
 176
 177 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
 178
 179 def slugify(text, delim=u'-'):
 180     """
 181     Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
 182     """
 183     result = []
 184     for word in _punct_re.split(text.lower()):
 185         word = word.encode('translit/long')
 186         if word:
 187             result.append(word)
 188     return unicode(delim.join(result))
 189
 190 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 191 ### Special email test stuff begins HERE
 192 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 193
 194 # We have two "test inboxes" here:
 195 #
 196 # EMAIL_TEST_INBOX:
 197 # ----------------
 198 #   If you're writing test views, you'll probably want to check this.
 199 #   It contains a list of MIMEText messages.
 200 #
 201 # EMAIL_TEST_MBOX_INBOX:
 202 # ----------------------
 203 #   This collects the messages from the FakeMhost inbox.  It's reslly
 204 #   just here for testing the send_email method itself.
 205 #
 206 #   Anyway this contains:
 207 #    - from
 208 #    - to: a list of email recipient addresses
 209 #    - message: not just the body, but the whole message, including
 210 #      headers, etc.
 211 #
 212 # ***IMPORTANT!***
 213 # ----------------
 214 # Before running tests that call functions which send email, you should
 215 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
 216
 217 EMAIL_TEST_INBOX = []
 218 EMAIL_TEST_MBOX_INBOX = []
 219
 220
 221 class FakeMhost(object):
 222     """
 223     Just a fake mail host so we can capture and test messages
 224     from send_email
 225     """
 226     def connect(self):
 227         pass
 228
 229     def sendmail(self, from_addr, to_addrs, message):
 230         EMAIL_TEST_MBOX_INBOX.append(
 231             {'from': from_addr,
 232              'to': to_addrs,
 233              'message': message})
 234
 235 def _clear_test_inboxes():
 236     global EMAIL_TEST_INBOX
 237     global EMAIL_TEST_MBOX_INBOX
 238     EMAIL_TEST_INBOX = []
 239     EMAIL_TEST_MBOX_INBOX = []
 240
 241 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 242 ### </Special email test stuff>
 243 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 244
 245 def send_email(from_addr, to_addrs, subject, message_body):
 246     """
 247     Simple email sending wrapper, use this so we can capture messages
 248     for unit testing purposes.
 249
 250     Args:
 251      - from_addr: address you're sending the email from
 252      - to_addrs: list of recipient email addresses
 253      - subject: subject of the email
 254      - message_body: email body text
 255     """
 256     # TODO: make a mock mhost if testing is enabled
 257     if TESTS_ENABLED or mg_globals.email_debug_mode:
 258         mhost = FakeMhost()
 259     elif not mg_globals.email_debug_mode:
 260         mhost = smtplib.SMTP()
 261
 262     mhost.connect()
 263
 264     message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
 265     message['Subject'] = subject
 266     message['From'] = from_addr
 267     message['To'] = ', '.join(to_addrs)
 268
 269     if TESTS_ENABLED:
 270         EMAIL_TEST_INBOX.append(message)
 271
 272     if getattr(mg_globals, 'email_debug_mode', False):
 273         print u"===== Email ====="
 274         print u"From address: %s" % message['From']
 275         print u"To addresses: %s" % message['To']
 276         print u"Subject: %s" % message['Subject']
 277         print u"-- Body: --"
 278         print message.get_payload(decode=True)
 279
 280     return mhost.sendmail(from_addr, to_addrs, message.as_string())
 281
 282
 283 ###################
 284 # Translation tools
 285 ###################
 286
 287
 288 TRANSLATIONS_PATH = pkg_resources.resource_filename(
 289     'mediagoblin', 'translations')
 290
 291
 292 def locale_to_lower_upper(locale):
 293     """
 294     Take a locale, regardless of style, and format it like "en-us"
 295     """
 296     if '-' in locale:
 297         lang, country = locale.split('-', 1)
 298         return '%s_%s' % (lang.lower(), country.upper())
 299     elif '_' in locale:
 300         lang, country = locale.split('_', 1)
 301         return '%s_%s' % (lang.lower(), country.upper())
 302     else:
 303         return locale.lower()
 304
 305
 306 def locale_to_lower_lower(locale):
 307     """
 308     Take a locale, regardless of style, and format it like "en_US"
 309     """
 310     if '_' in locale:
 311         lang, country = locale.split('_', 1)
 312         return '%s-%s' % (lang.lower(), country.lower())
 313     else:
 314         return locale.lower()
 315
 316
 317 def get_locale_from_request(request):
 318     """
 319     Figure out what target language is most appropriate based on the
 320     request
 321     """
 322     request_form = request.GET or request.POST
 323
 324     if request_form.has_key('lang'):
 325         return locale_to_lower_upper(request_form['lang'])
 326
 327     accept_lang_matches = request.accept_language.best_matches()
 328
 329     # Your routing can explicitly specify a target language
 330     if request.matchdict.has_key('locale'):
 331         target_lang = request.matchdict['locale']
 332     elif request.session.has_key('target_lang'):
 333         target_lang = request.session['target_lang']
 334     # Pull the first acceptable language
 335     elif accept_lang_matches:
 336         target_lang = accept_lang_matches[0]
 337     # Fall back to English
 338     else:
 339         target_lang = 'en'
 340
 341     return locale_to_lower_upper(target_lang)
 342
 343
 344 # A super strict version of the lxml.html cleaner class
 345 HTML_CLEANER = Cleaner(
 346     scripts=True,
 347     javascript=True,
 348     comments=True,
 349     style=True,
 350     links=True,
 351     page_structure=True,
 352     processing_instructions=True,
 353     embedded=True,
 354     frames=True,
 355     forms=True,
 356     annoying_tags=True,
 357     allow_tags=[
 358         'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
 359     remove_unknown_tags=False, # can't be used with allow_tags
 360     safe_attrs_only=True,
 361     add_nofollow=True, # for now
 362     host_whitelist=(),
 363     whitelist_tags=set([]))
 364
 365
 366 def clean_html(html):
 367     # clean_html barfs on an empty string
 368     if not html:
 369         return u''
 370
 371     return HTML_CLEANER.clean_html(html)
 372
 373
 374 def convert_to_tag_list_of_dicts(tag_string):
 375     """
 376     Filter input from incoming string containing user tags,
 377
 378     Strips trailing, leading, and internal whitespace, and also converts
 379     the "tags" text into an array of tags
 380     """
 381     taglist = []
 382     if tag_string:
 383
 384         # Strip out internal, trailing, and leading whitespace
 385         stripped_tag_string = u' '.join(tag_string.strip().split())
 386
 387         # Split the tag string into a list of tags
 388         for tag in stripped_tag_string.split(
 389                                        mg_globals.app_config['tags_delimiter']):
 390
 391             # Ignore empty or duplicate tags
 392             if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
 393
 394                 if mg_globals.app_config['tags_case_sensitive']:
 395                     taglist.append({'name': tag.strip(),
 396                                     'slug': slugify(tag.strip())})
 397                 else:
 398                     taglist.append({'name': tag.strip().lower(),
 399                                     'slug': slugify(tag.strip().lower())})
 400     return taglist
 401
 402
 403 def media_tags_as_string(media_entry_tags):
 404     """
 405     Generate a string from a media item's tags, stored as a list of dicts
 406
 407     This is the opposite of convert_to_tag_list_of_dicts
 408     """
 409     media_tag_string = ''
 410     if media_entry_tags:
 411         media_tag_string = mg_globals.app_config['tags_delimiter'].join(
 412                                       [tag['name'] for tag in media_entry_tags])
 413     return media_tag_string
 414
 415 TOO_LONG_TAG_WARNING = \
 416     u'Tags must be shorter than %s characters.  Tags that are too long: %s'
 417
 418 def tag_length_validator(form, field):
 419     """
 420     Make sure tags do not exceed the maximum tag length.
 421     """
 422     tags = convert_to_tag_list_of_dicts(field.data)
 423     too_long_tags = [
 424         tag['name'] for tag in tags
 425         if len(tag['name']) > mg_globals.app_config['tags_max_length']]
 426
 427     if too_long_tags:
 428         raise wtforms.ValidationError(
 429             TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
 430                                     ', '.join(too_long_tags)))
 431
 432
 433 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
 434
 435 def cleaned_markdown_conversion(text):
 436     """
 437     Take a block of text, run it through MarkDown, and clean its HTML.
 438     """
 439     # Markdown will do nothing with and clean_html can do nothing with
 440     # an empty string :)
 441     if not text:
 442         return u''
 443
 444     return clean_html(MARKDOWN_INSTANCE.convert(text))
 445
 446
 447 SETUP_GETTEXTS = {}
 448
 449 def setup_gettext(locale):
 450     """
 451     Setup the gettext instance based on this locale
 452     """
 453     # Later on when we have plugins we may want to enable the
 454     # multi-translations system they have so we can handle plugin
 455     # translations too
 456
 457     # TODO: fallback nicely on translations from pt_PT to pt if not
 458     # available, etc.
 459     if SETUP_GETTEXTS.has_key(locale):
 460         this_gettext = SETUP_GETTEXTS[locale]
 461     else:
 462         this_gettext = gettext.translation(
 463             'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
 464         if exists(locale):
 465             SETUP_GETTEXTS[locale] = this_gettext
 466
 467     mg_globals.setup_globals(
 468         translations=this_gettext)
 469
 470
 471 PAGINATION_DEFAULT_PER_PAGE = 30
 472
 473 class Pagination(object):
 474     """
 475     Pagination class for mongodb queries.
 476
 477     Initialization through __init__(self, cursor, page=1, per_page=2),
 478     get actual data slice through __call__().
 479     """
 480
 481     def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE):
 482         """
 483         Initializes Pagination
 484
 485         Args:
 486          - page: requested page
 487          - per_page: number of objects per page
 488          - cursor: db cursor
 489         """
 490         self.page = page
 491         self.per_page = per_page
 492         self.cursor = cursor
 493         self.total_count = self.cursor.count()
 494
 495     def __call__(self):
 496         """
 497         Returns slice of objects for the requested page
 498         """
 499         return self.cursor.skip(
 500             (self.page - 1) * self.per_page).limit(self.per_page)
 501
 502     @property
 503     def pages(self):
 504         return int(ceil(self.total_count / float(self.per_page)))
 505
 506     @property
 507     def has_prev(self):
 508         return self.page > 1
 509
 510     @property
 511     def has_next(self):
 512         return self.page < self.pages
 513
 514     def iter_pages(self, left_edge=2, left_current=2,
 515                    right_current=5, right_edge=2):
 516         last = 0
 517         for num in xrange(1, self.pages + 1):
 518             if num <= left_edge or \
 519                (num > self.page - left_current - 1 and \
 520                 num < self.page + right_current) or \
 521                num > self.pages - right_edge:
 522                 if last + 1 != num:
 523                     yield None
 524                 yield num
 525                 last = num
 526
 527     def get_page_url_explicit(self, base_url, get_params, page_no):
 528         """
 529         Get a page url by adding a page= parameter to the base url
 530         """
 531         new_get_params = copy.copy(get_params or {})
 532         new_get_params['page'] = page_no
 533         return "%s?%s" % (
 534             base_url, urllib.urlencode(new_get_params))
 535
 536     def get_page_url(self, request, page_no):
 537         """
 538         Get a new page url based of the request, and the new page number.
 539
 540         This is a nice wrapper around get_page_url_explicit()
 541         """
 542         return self.get_page_url_explicit(
 543             request.path_info, request.GET, page_no)