| 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
| 2 | # Copyright (C) 2011 Free Software Foundation, Inc |
| 3 | # |
| 4 | # This program is free software: you can redistribute it and/or modify |
| 5 | # it under the terms of the GNU Affero General Public License as published by |
| 6 | # the Free Software Foundation, either version 3 of the License, or |
| 7 | # (at your option) any later version. |
| 8 | # |
| 9 | # This program is distributed in the hope that it will be useful, |
| 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | # GNU Affero General Public License for more details. |
| 13 | # |
| 14 | # You should have received a copy of the GNU Affero General Public License |
| 15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | |
| 17 | from __future__ import division |
| 18 | |
| 19 | from email.MIMEText import MIMEText |
| 20 | import gettext |
| 21 | import pkg_resources |
| 22 | import smtplib |
| 23 | import sys |
| 24 | import re |
| 25 | import urllib |
| 26 | from math import ceil, floor |
| 27 | import copy |
| 28 | |
| 29 | from babel.localedata import exists |
| 30 | import jinja2 |
| 31 | import translitcodec |
| 32 | from webob import Response, exc |
| 33 | from lxml.html.clean import Cleaner |
| 34 | import markdown |
| 35 | |
| 36 | from mediagoblin import mg_globals |
| 37 | from mediagoblin import messages |
| 38 | from mediagoblin.db.util import ObjectId |
| 39 | |
| 40 | from itertools import izip, count |
| 41 | |
| 42 | DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb'] |
| 43 | |
| 44 | TESTS_ENABLED = False |
| 45 | def _activate_testing(): |
| 46 | """ |
| 47 | Call this to activate testing in util.py |
| 48 | """ |
| 49 | global TESTS_ENABLED |
| 50 | TESTS_ENABLED = True |
| 51 | |
| 52 | |
| 53 | def clear_test_buckets(): |
| 54 | """ |
| 55 | We store some things for testing purposes that should be cleared |
| 56 | when we want a "clean slate" of information for our next round of |
| 57 | tests. Call this function to wipe all that stuff clean. |
| 58 | |
| 59 | Also wipes out some other things we might redefine during testing, |
| 60 | like the jinja envs. |
| 61 | """ |
| 62 | global SETUP_JINJA_ENVS |
| 63 | SETUP_JINJA_ENVS = {} |
| 64 | |
| 65 | global EMAIL_TEST_INBOX |
| 66 | global EMAIL_TEST_MBOX_INBOX |
| 67 | EMAIL_TEST_INBOX = [] |
| 68 | EMAIL_TEST_MBOX_INBOX = [] |
| 69 | |
| 70 | clear_test_template_context() |
| 71 | |
| 72 | |
| 73 | SETUP_JINJA_ENVS = {} |
| 74 | |
| 75 | |
| 76 | def get_jinja_env(template_loader, locale): |
| 77 | """ |
| 78 | Set up the Jinja environment, |
| 79 | |
| 80 | (In the future we may have another system for providing theming; |
| 81 | for now this is good enough.) |
| 82 | """ |
| 83 | setup_gettext(locale) |
| 84 | |
| 85 | # If we have a jinja environment set up with this locale, just |
| 86 | # return that one. |
| 87 | if SETUP_JINJA_ENVS.has_key(locale): |
| 88 | return SETUP_JINJA_ENVS[locale] |
| 89 | |
| 90 | template_env = jinja2.Environment( |
| 91 | loader=template_loader, autoescape=True, |
| 92 | extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape']) |
| 93 | |
| 94 | template_env.install_gettext_callables( |
| 95 | mg_globals.translations.gettext, |
| 96 | mg_globals.translations.ngettext) |
| 97 | |
| 98 | # All templates will know how to ... |
| 99 | # ... fetch all waiting messages and remove them from the queue |
| 100 | template_env.globals['fetch_messages'] = messages.fetch_messages |
| 101 | |
| 102 | if exists(locale): |
| 103 | SETUP_JINJA_ENVS[locale] = template_env |
| 104 | |
| 105 | return template_env |
| 106 | |
| 107 | |
| 108 | # We'll store context information here when doing unit tests |
| 109 | TEMPLATE_TEST_CONTEXT = {} |
| 110 | |
| 111 | |
| 112 | def render_template(request, template_path, context): |
| 113 | """ |
| 114 | Render a template with context. |
| 115 | |
| 116 | Always inserts the request into the context, so you don't have to. |
| 117 | Also stores the context if we're doing unit tests. Helpful! |
| 118 | """ |
| 119 | template = request.template_env.get_template( |
| 120 | template_path) |
| 121 | context['request'] = request |
| 122 | rendered = template.render(context) |
| 123 | |
| 124 | if TESTS_ENABLED: |
| 125 | TEMPLATE_TEST_CONTEXT[template_path] = context |
| 126 | |
| 127 | return rendered |
| 128 | |
| 129 | |
| 130 | def clear_test_template_context(): |
| 131 | global TEMPLATE_TEST_CONTEXT |
| 132 | TEMPLATE_TEST_CONTEXT = {} |
| 133 | |
| 134 | |
| 135 | def render_to_response(request, template, context): |
| 136 | """Much like Django's shortcut.render()""" |
| 137 | return Response(render_template(request, template, context)) |
| 138 | |
| 139 | |
| 140 | def redirect(request, *args, **kwargs): |
| 141 | """Returns a HTTPFound(), takes a request and then urlgen params""" |
| 142 | |
| 143 | querystring = None |
| 144 | if kwargs.get('querystring'): |
| 145 | querystring = kwargs.get('querystring') |
| 146 | del kwargs['querystring'] |
| 147 | |
| 148 | return exc.HTTPFound( |
| 149 | location=''.join([ |
| 150 | request.urlgen(*args, **kwargs), |
| 151 | querystring if querystring else ''])) |
| 152 | |
| 153 | |
| 154 | def setup_user_in_request(request): |
| 155 | """ |
| 156 | Examine a request and tack on a request.user parameter if that's |
| 157 | appropriate. |
| 158 | """ |
| 159 | if not request.session.has_key('user_id'): |
| 160 | request.user = None |
| 161 | return |
| 162 | |
| 163 | user = None |
| 164 | user = request.app.db.User.one( |
| 165 | {'_id': ObjectId(request.session['user_id'])}) |
| 166 | |
| 167 | if not user: |
| 168 | # Something's wrong... this user doesn't exist? Invalidate |
| 169 | # this session. |
| 170 | request.session.invalidate() |
| 171 | |
| 172 | request.user = user |
| 173 | |
| 174 | |
| 175 | def import_component(import_string): |
| 176 | """ |
| 177 | Import a module component defined by STRING. Probably a method, |
| 178 | class, or global variable. |
| 179 | |
| 180 | Args: |
| 181 | - import_string: a string that defines what to import. Written |
| 182 | in the format of "module1.module2:component" |
| 183 | """ |
| 184 | module_name, func_name = import_string.split(':', 1) |
| 185 | __import__(module_name) |
| 186 | module = sys.modules[module_name] |
| 187 | func = getattr(module, func_name) |
| 188 | return func |
| 189 | |
| 190 | _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') |
| 191 | |
| 192 | def slugify(text, delim=u'-'): |
| 193 | """ |
| 194 | Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/ |
| 195 | """ |
| 196 | result = [] |
| 197 | for word in _punct_re.split(text.lower()): |
| 198 | word = word.encode('translit/long') |
| 199 | if word: |
| 200 | result.append(word) |
| 201 | return unicode(delim.join(result)) |
| 202 | |
| 203 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 204 | ### Special email test stuff begins HERE |
| 205 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 206 | |
| 207 | # We have two "test inboxes" here: |
| 208 | # |
| 209 | # EMAIL_TEST_INBOX: |
| 210 | # ---------------- |
| 211 | # If you're writing test views, you'll probably want to check this. |
| 212 | # It contains a list of MIMEText messages. |
| 213 | # |
| 214 | # EMAIL_TEST_MBOX_INBOX: |
| 215 | # ---------------------- |
| 216 | # This collects the messages from the FakeMhost inbox. It's reslly |
| 217 | # just here for testing the send_email method itself. |
| 218 | # |
| 219 | # Anyway this contains: |
| 220 | # - from |
| 221 | # - to: a list of email recipient addresses |
| 222 | # - message: not just the body, but the whole message, including |
| 223 | # headers, etc. |
| 224 | # |
| 225 | # ***IMPORTANT!*** |
| 226 | # ---------------- |
| 227 | # Before running tests that call functions which send email, you should |
| 228 | # always call _clear_test_inboxes() to "wipe" the inboxes clean. |
| 229 | |
| 230 | EMAIL_TEST_INBOX = [] |
| 231 | EMAIL_TEST_MBOX_INBOX = [] |
| 232 | |
| 233 | |
| 234 | class FakeMhost(object): |
| 235 | """ |
| 236 | Just a fake mail host so we can capture and test messages |
| 237 | from send_email |
| 238 | """ |
| 239 | def connect(self): |
| 240 | pass |
| 241 | |
| 242 | def sendmail(self, from_addr, to_addrs, message): |
| 243 | EMAIL_TEST_MBOX_INBOX.append( |
| 244 | {'from': from_addr, |
| 245 | 'to': to_addrs, |
| 246 | 'message': message}) |
| 247 | |
| 248 | def _clear_test_inboxes(): |
| 249 | global EMAIL_TEST_INBOX |
| 250 | global EMAIL_TEST_MBOX_INBOX |
| 251 | EMAIL_TEST_INBOX = [] |
| 252 | EMAIL_TEST_MBOX_INBOX = [] |
| 253 | |
| 254 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 255 | ### </Special email test stuff> |
| 256 | ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 257 | |
| 258 | def send_email(from_addr, to_addrs, subject, message_body): |
| 259 | """ |
| 260 | Simple email sending wrapper, use this so we can capture messages |
| 261 | for unit testing purposes. |
| 262 | |
| 263 | Args: |
| 264 | - from_addr: address you're sending the email from |
| 265 | - to_addrs: list of recipient email addresses |
| 266 | - subject: subject of the email |
| 267 | - message_body: email body text |
| 268 | """ |
| 269 | # TODO: make a mock mhost if testing is enabled |
| 270 | if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']: |
| 271 | mhost = FakeMhost() |
| 272 | elif not mg_globals.app_config['email_debug_mode']: |
| 273 | mhost = smtplib.SMTP() |
| 274 | |
| 275 | mhost.connect() |
| 276 | |
| 277 | message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8') |
| 278 | message['Subject'] = subject |
| 279 | message['From'] = from_addr |
| 280 | message['To'] = ', '.join(to_addrs) |
| 281 | |
| 282 | if TESTS_ENABLED: |
| 283 | EMAIL_TEST_INBOX.append(message) |
| 284 | |
| 285 | if mg_globals.app_config['email_debug_mode']: |
| 286 | print u"===== Email =====" |
| 287 | print u"From address: %s" % message['From'] |
| 288 | print u"To addresses: %s" % message['To'] |
| 289 | print u"Subject: %s" % message['Subject'] |
| 290 | print u"-- Body: --" |
| 291 | print message.get_payload(decode=True) |
| 292 | |
| 293 | return mhost.sendmail(from_addr, to_addrs, message.as_string()) |
| 294 | |
| 295 | |
| 296 | ################### |
| 297 | # Translation tools |
| 298 | ################### |
| 299 | |
| 300 | |
| 301 | TRANSLATIONS_PATH = pkg_resources.resource_filename( |
| 302 | 'mediagoblin', 'translations') |
| 303 | |
| 304 | |
| 305 | def locale_to_lower_upper(locale): |
| 306 | """ |
| 307 | Take a locale, regardless of style, and format it like "en-us" |
| 308 | """ |
| 309 | if '-' in locale: |
| 310 | lang, country = locale.split('-', 1) |
| 311 | return '%s_%s' % (lang.lower(), country.upper()) |
| 312 | elif '_' in locale: |
| 313 | lang, country = locale.split('_', 1) |
| 314 | return '%s_%s' % (lang.lower(), country.upper()) |
| 315 | else: |
| 316 | return locale.lower() |
| 317 | |
| 318 | |
| 319 | def locale_to_lower_lower(locale): |
| 320 | """ |
| 321 | Take a locale, regardless of style, and format it like "en_US" |
| 322 | """ |
| 323 | if '_' in locale: |
| 324 | lang, country = locale.split('_', 1) |
| 325 | return '%s-%s' % (lang.lower(), country.lower()) |
| 326 | else: |
| 327 | return locale.lower() |
| 328 | |
| 329 | |
| 330 | def get_locale_from_request(request): |
| 331 | """ |
| 332 | Figure out what target language is most appropriate based on the |
| 333 | request |
| 334 | """ |
| 335 | request_form = request.GET or request.POST |
| 336 | |
| 337 | if request_form.has_key('lang'): |
| 338 | return locale_to_lower_upper(request_form['lang']) |
| 339 | |
| 340 | accept_lang_matches = request.accept_language.best_matches() |
| 341 | |
| 342 | # Your routing can explicitly specify a target language |
| 343 | if request.matchdict.has_key('locale'): |
| 344 | target_lang = request.matchdict['locale'] |
| 345 | elif request.session.has_key('target_lang'): |
| 346 | target_lang = request.session['target_lang'] |
| 347 | # Pull the first acceptable language |
| 348 | elif accept_lang_matches: |
| 349 | target_lang = accept_lang_matches[0] |
| 350 | # Fall back to English |
| 351 | else: |
| 352 | target_lang = 'en' |
| 353 | |
| 354 | return locale_to_lower_upper(target_lang) |
| 355 | |
| 356 | |
| 357 | # A super strict version of the lxml.html cleaner class |
| 358 | HTML_CLEANER = Cleaner( |
| 359 | scripts=True, |
| 360 | javascript=True, |
| 361 | comments=True, |
| 362 | style=True, |
| 363 | links=True, |
| 364 | page_structure=True, |
| 365 | processing_instructions=True, |
| 366 | embedded=True, |
| 367 | frames=True, |
| 368 | forms=True, |
| 369 | annoying_tags=True, |
| 370 | allow_tags=[ |
| 371 | 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'], |
| 372 | remove_unknown_tags=False, # can't be used with allow_tags |
| 373 | safe_attrs_only=True, |
| 374 | add_nofollow=True, # for now |
| 375 | host_whitelist=(), |
| 376 | whitelist_tags=set([])) |
| 377 | |
| 378 | |
| 379 | def clean_html(html): |
| 380 | # clean_html barfs on an empty string |
| 381 | if not html: |
| 382 | return u'' |
| 383 | |
| 384 | return HTML_CLEANER.clean_html(html) |
| 385 | |
| 386 | |
| 387 | MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape') |
| 388 | |
| 389 | |
| 390 | def cleaned_markdown_conversion(text): |
| 391 | """ |
| 392 | Take a block of text, run it through MarkDown, and clean its HTML. |
| 393 | """ |
| 394 | # Markdown will do nothing with and clean_html can do nothing with |
| 395 | # an empty string :) |
| 396 | if not text: |
| 397 | return u'' |
| 398 | |
| 399 | return clean_html(MARKDOWN_INSTANCE.convert(text)) |
| 400 | |
| 401 | |
| 402 | SETUP_GETTEXTS = {} |
| 403 | |
| 404 | def setup_gettext(locale): |
| 405 | """ |
| 406 | Setup the gettext instance based on this locale |
| 407 | """ |
| 408 | # Later on when we have plugins we may want to enable the |
| 409 | # multi-translations system they have so we can handle plugin |
| 410 | # translations too |
| 411 | |
| 412 | # TODO: fallback nicely on translations from pt_PT to pt if not |
| 413 | # available, etc. |
| 414 | if SETUP_GETTEXTS.has_key(locale): |
| 415 | this_gettext = SETUP_GETTEXTS[locale] |
| 416 | else: |
| 417 | this_gettext = gettext.translation( |
| 418 | 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True) |
| 419 | if exists(locale): |
| 420 | SETUP_GETTEXTS[locale] = this_gettext |
| 421 | |
| 422 | mg_globals.setup_globals( |
| 423 | translations=this_gettext) |
| 424 | |
| 425 | |
| 426 | PAGINATION_DEFAULT_PER_PAGE = 30 |
| 427 | |
| 428 | class Pagination(object): |
| 429 | """ |
| 430 | Pagination class for mongodb queries. |
| 431 | |
| 432 | Initialization through __init__(self, cursor, page=1, per_page=2), |
| 433 | get actual data slice through __call__(). |
| 434 | """ |
| 435 | |
| 436 | def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE, |
| 437 | jump_to_id=False): |
| 438 | """ |
| 439 | Initializes Pagination |
| 440 | |
| 441 | Args: |
| 442 | - page: requested page |
| 443 | - per_page: number of objects per page |
| 444 | - cursor: db cursor |
| 445 | - jump_to_id: ObjectId, sets the page to the page containing the object |
| 446 | with _id == jump_to_id. |
| 447 | """ |
| 448 | self.page = page |
| 449 | self.per_page = per_page |
| 450 | self.cursor = cursor |
| 451 | self.total_count = self.cursor.count() |
| 452 | self.active_id = None |
| 453 | |
| 454 | if jump_to_id: |
| 455 | cursor = copy.copy(self.cursor) |
| 456 | |
| 457 | for (doc, increment) in izip(cursor, count(0)): |
| 458 | if doc['_id'] == jump_to_id: |
| 459 | self.page = 1 + int(floor(increment / self.per_page)) |
| 460 | |
| 461 | self.active_id = jump_to_id |
| 462 | break |
| 463 | |
| 464 | |
| 465 | def __call__(self): |
| 466 | """ |
| 467 | Returns slice of objects for the requested page |
| 468 | """ |
| 469 | return self.cursor.skip( |
| 470 | (self.page - 1) * self.per_page).limit(self.per_page) |
| 471 | |
| 472 | @property |
| 473 | def pages(self): |
| 474 | return int(ceil(self.total_count / float(self.per_page))) |
| 475 | |
| 476 | @property |
| 477 | def has_prev(self): |
| 478 | return self.page > 1 |
| 479 | |
| 480 | @property |
| 481 | def has_next(self): |
| 482 | return self.page < self.pages |
| 483 | |
| 484 | def iter_pages(self, left_edge=2, left_current=2, |
| 485 | right_current=5, right_edge=2): |
| 486 | last = 0 |
| 487 | for num in xrange(1, self.pages + 1): |
| 488 | if num <= left_edge or \ |
| 489 | (num > self.page - left_current - 1 and \ |
| 490 | num < self.page + right_current) or \ |
| 491 | num > self.pages - right_edge: |
| 492 | if last + 1 != num: |
| 493 | yield None |
| 494 | yield num |
| 495 | last = num |
| 496 | |
| 497 | def get_page_url_explicit(self, base_url, get_params, page_no): |
| 498 | """ |
| 499 | Get a page url by adding a page= parameter to the base url |
| 500 | """ |
| 501 | new_get_params = copy.copy(get_params or {}) |
| 502 | new_get_params['page'] = page_no |
| 503 | return "%s?%s" % ( |
| 504 | base_url, urllib.urlencode(new_get_params)) |
| 505 | |
| 506 | def get_page_url(self, request, page_no): |
| 507 | """ |
| 508 | Get a new page url based of the request, and the new page number. |
| 509 | |
| 510 | This is a nice wrapper around get_page_url_explicit() |
| 511 | """ |
| 512 | return self.get_page_url_explicit( |
| 513 | request.path_info, request.GET, page_no) |