Only migrate to description_html if description also exists.
[mediagoblin.git] / mediagoblin / util.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 Free Software Foundation, Inc
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from email.MIMEText import MIMEText
18 import gettext
19 import pkg_resources
20 import smtplib
21 import sys
22 import re
23 import urllib
24 from math import ceil
25 import copy
26
27 from babel.localedata import exists
28 import jinja2
29 import translitcodec
30 from webob import Response, exc
31 from lxml.html.clean import Cleaner
32 import markdown
33
34 from mediagoblin import mg_globals
35 from mediagoblin.db.util import ObjectId
36
37 TESTS_ENABLED = False
38 def _activate_testing():
39 """
40 Call this to activate testing in util.py
41 """
42 global TESTS_ENABLED
43 TESTS_ENABLED = True
44
45
46 def clear_test_buckets():
47 """
48 We store some things for testing purposes that should be cleared
49 when we want a "clean slate" of information for our next round of
50 tests. Call this function to wipe all that stuff clean.
51
52 Also wipes out some other things we might redefine during testing,
53 like the jinja envs.
54 """
55 global SETUP_JINJA_ENVS
56 SETUP_JINJA_ENVS = {}
57
58 global EMAIL_TEST_INBOX
59 global EMAIL_TEST_MBOX_INBOX
60 EMAIL_TEST_INBOX = []
61 EMAIL_TEST_MBOX_INBOX = []
62
63 clear_test_template_context()
64
65
66 def get_jinja_loader(user_template_path=None):
67 """
68 Set up the Jinja template loaders, possibly allowing for user
69 overridden templates.
70
71 (In the future we may have another system for providing theming;
72 for now this is good enough.)
73 """
74 if user_template_path:
75 return jinja2.ChoiceLoader(
76 [jinja2.FileSystemLoader(user_template_path),
77 jinja2.PackageLoader('mediagoblin', 'templates')])
78 else:
79 return jinja2.PackageLoader('mediagoblin', 'templates')
80
81
82 SETUP_JINJA_ENVS = {}
83
84
85 def get_jinja_env(template_loader, locale):
86 """
87 Set up the Jinja environment,
88
89 (In the future we may have another system for providing theming;
90 for now this is good enough.)
91 """
92 setup_gettext(locale)
93
94 # If we have a jinja environment set up with this locale, just
95 # return that one.
96 if SETUP_JINJA_ENVS.has_key(locale):
97 return SETUP_JINJA_ENVS[locale]
98
99 template_env = jinja2.Environment(
100 loader=template_loader, autoescape=True,
101 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
102
103 template_env.install_gettext_callables(
104 mg_globals.translations.gettext,
105 mg_globals.translations.ngettext)
106
107 if exists(locale):
108 SETUP_JINJA_ENVS[locale] = template_env
109
110 return template_env
111
112
113 # We'll store context information here when doing unit tests
114 TEMPLATE_TEST_CONTEXT = {}
115
116
117 def render_template(request, template_path, context):
118 """
119 Render a template with context.
120
121 Always inserts the request into the context, so you don't have to.
122 Also stores the context if we're doing unit tests. Helpful!
123 """
124 template = request.template_env.get_template(
125 template_path)
126 context['request'] = request
127 rendered = template.render(context)
128
129 if TESTS_ENABLED:
130 TEMPLATE_TEST_CONTEXT[template_path] = context
131
132 return rendered
133
134
135 def clear_test_template_context():
136 global TEMPLATE_TEST_CONTEXT
137 TEMPLATE_TEST_CONTEXT = {}
138
139
140 def render_to_response(request, template, context):
141 """Much like Django's shortcut.render()"""
142 return Response(render_template(request, template, context))
143
144
145 def redirect(request, *args, **kwargs):
146 """Returns a HTTPFound(), takes a request and then urlgen params"""
147 return exc.HTTPFound(location=request.urlgen(*args, **kwargs))
148
149
150 def setup_user_in_request(request):
151 """
152 Examine a request and tack on a request.user parameter if that's
153 appropriate.
154 """
155 if not request.session.has_key('user_id'):
156 request.user = None
157 return
158
159 user = None
160 user = request.app.db.User.one(
161 {'_id': ObjectId(request.session['user_id'])})
162
163 if not user:
164 # Something's wrong... this user doesn't exist? Invalidate
165 # this session.
166 request.session.invalidate()
167
168 request.user = user
169
170
171 def import_component(import_string):
172 """
173 Import a module component defined by STRING. Probably a method,
174 class, or global variable.
175
176 Args:
177 - import_string: a string that defines what to import. Written
178 in the format of "module1.module2:component"
179 """
180 module_name, func_name = import_string.split(':', 1)
181 __import__(module_name)
182 module = sys.modules[module_name]
183 func = getattr(module, func_name)
184 return func
185
186 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
187
188 def slugify(text, delim=u'-'):
189 """
190 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
191 """
192 result = []
193 for word in _punct_re.split(text.lower()):
194 word = word.encode('translit/long')
195 if word:
196 result.append(word)
197 return unicode(delim.join(result))
198
199 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
200 ### Special email test stuff begins HERE
201 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202
203 # We have two "test inboxes" here:
204 #
205 # EMAIL_TEST_INBOX:
206 # ----------------
207 # If you're writing test views, you'll probably want to check this.
208 # It contains a list of MIMEText messages.
209 #
210 # EMAIL_TEST_MBOX_INBOX:
211 # ----------------------
212 # This collects the messages from the FakeMhost inbox. It's reslly
213 # just here for testing the send_email method itself.
214 #
215 # Anyway this contains:
216 # - from
217 # - to: a list of email recipient addresses
218 # - message: not just the body, but the whole message, including
219 # headers, etc.
220 #
221 # ***IMPORTANT!***
222 # ----------------
223 # Before running tests that call functions which send email, you should
224 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
225
226 EMAIL_TEST_INBOX = []
227 EMAIL_TEST_MBOX_INBOX = []
228
229
230 class FakeMhost(object):
231 """
232 Just a fake mail host so we can capture and test messages
233 from send_email
234 """
235 def connect(self):
236 pass
237
238 def sendmail(self, from_addr, to_addrs, message):
239 EMAIL_TEST_MBOX_INBOX.append(
240 {'from': from_addr,
241 'to': to_addrs,
242 'message': message})
243
244 def _clear_test_inboxes():
245 global EMAIL_TEST_INBOX
246 global EMAIL_TEST_MBOX_INBOX
247 EMAIL_TEST_INBOX = []
248 EMAIL_TEST_MBOX_INBOX = []
249
250 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
251 ### </Special email test stuff>
252 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
253
254 def send_email(from_addr, to_addrs, subject, message_body):
255 """
256 Simple email sending wrapper, use this so we can capture messages
257 for unit testing purposes.
258
259 Args:
260 - from_addr: address you're sending the email from
261 - to_addrs: list of recipient email addresses
262 - subject: subject of the email
263 - message_body: email body text
264 """
265 # TODO: make a mock mhost if testing is enabled
266 if TESTS_ENABLED or mg_globals.email_debug_mode:
267 mhost = FakeMhost()
268 elif not mg_globals.email_debug_mode:
269 mhost = smtplib.SMTP()
270
271 mhost.connect()
272
273 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
274 message['Subject'] = subject
275 message['From'] = from_addr
276 message['To'] = ', '.join(to_addrs)
277
278 if TESTS_ENABLED:
279 EMAIL_TEST_INBOX.append(message)
280
281 if getattr(mg_globals, 'email_debug_mode', False):
282 print u"===== Email ====="
283 print u"From address: %s" % message['From']
284 print u"To addresses: %s" % message['To']
285 print u"Subject: %s" % message['Subject']
286 print u"-- Body: --"
287 print message.get_payload(decode=True)
288
289 return mhost.sendmail(from_addr, to_addrs, message.as_string())
290
291
292 ###################
293 # Translation tools
294 ###################
295
296
297 TRANSLATIONS_PATH = pkg_resources.resource_filename(
298 'mediagoblin', 'translations')
299
300
301 def locale_to_lower_upper(locale):
302 """
303 Take a locale, regardless of style, and format it like "en-us"
304 """
305 if '-' in locale:
306 lang, country = locale.split('-', 1)
307 return '%s_%s' % (lang.lower(), country.upper())
308 elif '_' in locale:
309 lang, country = locale.split('_', 1)
310 return '%s_%s' % (lang.lower(), country.upper())
311 else:
312 return locale.lower()
313
314
315 def locale_to_lower_lower(locale):
316 """
317 Take a locale, regardless of style, and format it like "en_US"
318 """
319 if '_' in locale:
320 lang, country = locale.split('_', 1)
321 return '%s-%s' % (lang.lower(), country.lower())
322 else:
323 return locale.lower()
324
325
326 def get_locale_from_request(request):
327 """
328 Figure out what target language is most appropriate based on the
329 request
330 """
331 request_form = request.GET or request.POST
332
333 if request_form.has_key('lang'):
334 return locale_to_lower_upper(request_form['lang'])
335
336 accept_lang_matches = request.accept_language.best_matches()
337
338 # Your routing can explicitly specify a target language
339 if request.matchdict.has_key('locale'):
340 target_lang = request.matchdict['locale']
341 elif request.session.has_key('target_lang'):
342 target_lang = request.session['target_lang']
343 # Pull the first acceptable language
344 elif accept_lang_matches:
345 target_lang = accept_lang_matches[0]
346 # Fall back to English
347 else:
348 target_lang = 'en'
349
350 return locale_to_lower_upper(target_lang)
351
352
353 # A super strict version of the lxml.html cleaner class
354 HTML_CLEANER = Cleaner(
355 scripts=True,
356 javascript=True,
357 comments=True,
358 style=True,
359 links=True,
360 page_structure=True,
361 processing_instructions=True,
362 embedded=True,
363 frames=True,
364 forms=True,
365 annoying_tags=True,
366 allow_tags=[
367 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
368 remove_unknown_tags=False, # can't be used with allow_tags
369 safe_attrs_only=True,
370 add_nofollow=True, # for now
371 host_whitelist=(),
372 whitelist_tags=set([]))
373
374
375 def clean_html(html):
376 return HTML_CLEANER.clean_html(html)
377
378
379 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
380
381
382 def cleaned_markdown_conversion(text):
383 """
384 Take a block of text, run it through MarkDown, and clean its HTML.
385 """
386 # Markdown will do nothing with and clean_html can do nothing with
387 # an empty string :)
388 if not text:
389 return u''
390
391 return clean_html(MARKDOWN_INSTANCE.convert(text))
392
393
394 SETUP_GETTEXTS = {}
395
396 def setup_gettext(locale):
397 """
398 Setup the gettext instance based on this locale
399 """
400 # Later on when we have plugins we may want to enable the
401 # multi-translations system they have so we can handle plugin
402 # translations too
403
404 # TODO: fallback nicely on translations from pt_PT to pt if not
405 # available, etc.
406 if SETUP_GETTEXTS.has_key(locale):
407 this_gettext = SETUP_GETTEXTS[locale]
408 else:
409 this_gettext = gettext.translation(
410 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
411 if exists(locale):
412 SETUP_GETTEXTS[locale] = this_gettext
413
414 mg_globals.setup_globals(
415 translations=this_gettext)
416
417
418 PAGINATION_DEFAULT_PER_PAGE = 30
419
420 class Pagination(object):
421 """
422 Pagination class for mongodb queries.
423
424 Initialization through __init__(self, cursor, page=1, per_page=2),
425 get actual data slice through __call__().
426 """
427
428 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE):
429 """
430 Initializes Pagination
431
432 Args:
433 - page: requested page
434 - per_page: number of objects per page
435 - cursor: db cursor
436 """
437 self.page = page
438 self.per_page = per_page
439 self.cursor = cursor
440 self.total_count = self.cursor.count()
441
442 def __call__(self):
443 """
444 Returns slice of objects for the requested page
445 """
446 return self.cursor.skip(
447 (self.page - 1) * self.per_page).limit(self.per_page)
448
449 @property
450 def pages(self):
451 return int(ceil(self.total_count / float(self.per_page)))
452
453 @property
454 def has_prev(self):
455 return self.page > 1
456
457 @property
458 def has_next(self):
459 return self.page < self.pages
460
461 def iter_pages(self, left_edge=2, left_current=2,
462 right_current=5, right_edge=2):
463 last = 0
464 for num in xrange(1, self.pages + 1):
465 if num <= left_edge or \
466 (num > self.page - left_current - 1 and \
467 num < self.page + right_current) or \
468 num > self.pages - right_edge:
469 if last + 1 != num:
470 yield None
471 yield num
472 last = num
473
474 def get_page_url_explicit(self, base_url, get_params, page_no):
475 """
476 Get a page url by adding a page= parameter to the base url
477 """
478 new_get_params = copy.copy(get_params or {})
479 new_get_params['page'] = page_no
480 return "%s?%s" % (
481 base_url, urllib.urlencode(new_get_params))
482
483 def get_page_url(self, request, page_no):
484 """
485 Get a new page url based of the request, and the new page number.
486
487 This is a nice wrapper around get_page_url_explicit()
488 """
489 return self.get_page_url_explicit(
490 request.path_info, request.GET, page_no)