Merge remote branch 'remotes/gullydwarf-cfdv/437_slug_shouldnt_be_empty'
[mediagoblin.git] / mediagoblin / util.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 Free Software Foundation, Inc
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from email.MIMEText import MIMEText
18 import gettext
19 import pkg_resources
20 import smtplib
21 import sys
22 import re
23 import urllib
24 from math import ceil
25 import copy
26
27 from babel.localedata import exists
28 import jinja2
29 import translitcodec
30 from webob import Response, exc
31 from lxml.html.clean import Cleaner
32 import markdown
33
34 from mediagoblin import mg_globals
35 from mediagoblin import messages
36 from mediagoblin.db.util import ObjectId
37
38 TESTS_ENABLED = False
39 def _activate_testing():
40 """
41 Call this to activate testing in util.py
42 """
43 global TESTS_ENABLED
44 TESTS_ENABLED = True
45
46
47 def clear_test_buckets():
48 """
49 We store some things for testing purposes that should be cleared
50 when we want a "clean slate" of information for our next round of
51 tests. Call this function to wipe all that stuff clean.
52
53 Also wipes out some other things we might redefine during testing,
54 like the jinja envs.
55 """
56 global SETUP_JINJA_ENVS
57 SETUP_JINJA_ENVS = {}
58
59 global EMAIL_TEST_INBOX
60 global EMAIL_TEST_MBOX_INBOX
61 EMAIL_TEST_INBOX = []
62 EMAIL_TEST_MBOX_INBOX = []
63
64 clear_test_template_context()
65
66
67 SETUP_JINJA_ENVS = {}
68
69
70 def get_jinja_env(template_loader, locale):
71 """
72 Set up the Jinja environment,
73
74 (In the future we may have another system for providing theming;
75 for now this is good enough.)
76 """
77 setup_gettext(locale)
78
79 # If we have a jinja environment set up with this locale, just
80 # return that one.
81 if SETUP_JINJA_ENVS.has_key(locale):
82 return SETUP_JINJA_ENVS[locale]
83
84 template_env = jinja2.Environment(
85 loader=template_loader, autoescape=True,
86 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
87
88 template_env.install_gettext_callables(
89 mg_globals.translations.gettext,
90 mg_globals.translations.ngettext)
91
92 # All templates will know how to ...
93 # ... fetch all waiting messages and remove them from the queue
94 template_env.globals['fetch_messages'] = messages.fetch_messages
95
96 if exists(locale):
97 SETUP_JINJA_ENVS[locale] = template_env
98
99 return template_env
100
101
102 # We'll store context information here when doing unit tests
103 TEMPLATE_TEST_CONTEXT = {}
104
105
106 def render_template(request, template_path, context):
107 """
108 Render a template with context.
109
110 Always inserts the request into the context, so you don't have to.
111 Also stores the context if we're doing unit tests. Helpful!
112 """
113 template = request.template_env.get_template(
114 template_path)
115 context['request'] = request
116 rendered = template.render(context)
117
118 if TESTS_ENABLED:
119 TEMPLATE_TEST_CONTEXT[template_path] = context
120
121 return rendered
122
123
124 def clear_test_template_context():
125 global TEMPLATE_TEST_CONTEXT
126 TEMPLATE_TEST_CONTEXT = {}
127
128
129 def render_to_response(request, template, context):
130 """Much like Django's shortcut.render()"""
131 return Response(render_template(request, template, context))
132
133
134 def redirect(request, *args, **kwargs):
135 """Returns a HTTPFound(), takes a request and then urlgen params"""
136 return exc.HTTPFound(location=request.urlgen(*args, **kwargs))
137
138
139 def setup_user_in_request(request):
140 """
141 Examine a request and tack on a request.user parameter if that's
142 appropriate.
143 """
144 if not request.session.has_key('user_id'):
145 request.user = None
146 return
147
148 user = None
149 user = request.app.db.User.one(
150 {'_id': ObjectId(request.session['user_id'])})
151
152 if not user:
153 # Something's wrong... this user doesn't exist? Invalidate
154 # this session.
155 request.session.invalidate()
156
157 request.user = user
158
159
160 def import_component(import_string):
161 """
162 Import a module component defined by STRING. Probably a method,
163 class, or global variable.
164
165 Args:
166 - import_string: a string that defines what to import. Written
167 in the format of "module1.module2:component"
168 """
169 module_name, func_name = import_string.split(':', 1)
170 __import__(module_name)
171 module = sys.modules[module_name]
172 func = getattr(module, func_name)
173 return func
174
175 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
176
177 def slugify(text, delim=u'-'):
178 """
179 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
180 """
181 result = []
182 for word in _punct_re.split(text.lower()):
183 word = word.encode('translit/long')
184 if word:
185 result.append(word)
186 return unicode(delim.join(result))
187
188 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189 ### Special email test stuff begins HERE
190 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
191
192 # We have two "test inboxes" here:
193 #
194 # EMAIL_TEST_INBOX:
195 # ----------------
196 # If you're writing test views, you'll probably want to check this.
197 # It contains a list of MIMEText messages.
198 #
199 # EMAIL_TEST_MBOX_INBOX:
200 # ----------------------
201 # This collects the messages from the FakeMhost inbox. It's reslly
202 # just here for testing the send_email method itself.
203 #
204 # Anyway this contains:
205 # - from
206 # - to: a list of email recipient addresses
207 # - message: not just the body, but the whole message, including
208 # headers, etc.
209 #
210 # ***IMPORTANT!***
211 # ----------------
212 # Before running tests that call functions which send email, you should
213 # always call _clear_test_inboxes() to "wipe" the inboxes clean.
214
215 EMAIL_TEST_INBOX = []
216 EMAIL_TEST_MBOX_INBOX = []
217
218
219 class FakeMhost(object):
220 """
221 Just a fake mail host so we can capture and test messages
222 from send_email
223 """
224 def connect(self):
225 pass
226
227 def sendmail(self, from_addr, to_addrs, message):
228 EMAIL_TEST_MBOX_INBOX.append(
229 {'from': from_addr,
230 'to': to_addrs,
231 'message': message})
232
233 def _clear_test_inboxes():
234 global EMAIL_TEST_INBOX
235 global EMAIL_TEST_MBOX_INBOX
236 EMAIL_TEST_INBOX = []
237 EMAIL_TEST_MBOX_INBOX = []
238
239 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
240 ### </Special email test stuff>
241 ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
242
243 def send_email(from_addr, to_addrs, subject, message_body):
244 """
245 Simple email sending wrapper, use this so we can capture messages
246 for unit testing purposes.
247
248 Args:
249 - from_addr: address you're sending the email from
250 - to_addrs: list of recipient email addresses
251 - subject: subject of the email
252 - message_body: email body text
253 """
254 # TODO: make a mock mhost if testing is enabled
255 if TESTS_ENABLED or mg_globals.email_debug_mode:
256 mhost = FakeMhost()
257 elif not mg_globals.email_debug_mode:
258 mhost = smtplib.SMTP()
259
260 mhost.connect()
261
262 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
263 message['Subject'] = subject
264 message['From'] = from_addr
265 message['To'] = ', '.join(to_addrs)
266
267 if TESTS_ENABLED:
268 EMAIL_TEST_INBOX.append(message)
269
270 if getattr(mg_globals, 'email_debug_mode', False):
271 print u"===== Email ====="
272 print u"From address: %s" % message['From']
273 print u"To addresses: %s" % message['To']
274 print u"Subject: %s" % message['Subject']
275 print u"-- Body: --"
276 print message.get_payload(decode=True)
277
278 return mhost.sendmail(from_addr, to_addrs, message.as_string())
279
280
281 ###################
282 # Translation tools
283 ###################
284
285
286 TRANSLATIONS_PATH = pkg_resources.resource_filename(
287 'mediagoblin', 'translations')
288
289
290 def locale_to_lower_upper(locale):
291 """
292 Take a locale, regardless of style, and format it like "en-us"
293 """
294 if '-' in locale:
295 lang, country = locale.split('-', 1)
296 return '%s_%s' % (lang.lower(), country.upper())
297 elif '_' in locale:
298 lang, country = locale.split('_', 1)
299 return '%s_%s' % (lang.lower(), country.upper())
300 else:
301 return locale.lower()
302
303
304 def locale_to_lower_lower(locale):
305 """
306 Take a locale, regardless of style, and format it like "en_US"
307 """
308 if '_' in locale:
309 lang, country = locale.split('_', 1)
310 return '%s-%s' % (lang.lower(), country.lower())
311 else:
312 return locale.lower()
313
314
315 def get_locale_from_request(request):
316 """
317 Figure out what target language is most appropriate based on the
318 request
319 """
320 request_form = request.GET or request.POST
321
322 if request_form.has_key('lang'):
323 return locale_to_lower_upper(request_form['lang'])
324
325 accept_lang_matches = request.accept_language.best_matches()
326
327 # Your routing can explicitly specify a target language
328 if request.matchdict.has_key('locale'):
329 target_lang = request.matchdict['locale']
330 elif request.session.has_key('target_lang'):
331 target_lang = request.session['target_lang']
332 # Pull the first acceptable language
333 elif accept_lang_matches:
334 target_lang = accept_lang_matches[0]
335 # Fall back to English
336 else:
337 target_lang = 'en'
338
339 return locale_to_lower_upper(target_lang)
340
341
342 # A super strict version of the lxml.html cleaner class
343 HTML_CLEANER = Cleaner(
344 scripts=True,
345 javascript=True,
346 comments=True,
347 style=True,
348 links=True,
349 page_structure=True,
350 processing_instructions=True,
351 embedded=True,
352 frames=True,
353 forms=True,
354 annoying_tags=True,
355 allow_tags=[
356 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
357 remove_unknown_tags=False, # can't be used with allow_tags
358 safe_attrs_only=True,
359 add_nofollow=True, # for now
360 host_whitelist=(),
361 whitelist_tags=set([]))
362
363
364 def clean_html(html):
365 # clean_html barfs on an empty string
366 if not html:
367 return u''
368
369 return HTML_CLEANER.clean_html(html)
370
371
372 MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
373
374
375 def cleaned_markdown_conversion(text):
376 """
377 Take a block of text, run it through MarkDown, and clean its HTML.
378 """
379 # Markdown will do nothing with and clean_html can do nothing with
380 # an empty string :)
381 if not text:
382 return u''
383
384 return clean_html(MARKDOWN_INSTANCE.convert(text))
385
386
387 SETUP_GETTEXTS = {}
388
389 def setup_gettext(locale):
390 """
391 Setup the gettext instance based on this locale
392 """
393 # Later on when we have plugins we may want to enable the
394 # multi-translations system they have so we can handle plugin
395 # translations too
396
397 # TODO: fallback nicely on translations from pt_PT to pt if not
398 # available, etc.
399 if SETUP_GETTEXTS.has_key(locale):
400 this_gettext = SETUP_GETTEXTS[locale]
401 else:
402 this_gettext = gettext.translation(
403 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
404 if exists(locale):
405 SETUP_GETTEXTS[locale] = this_gettext
406
407 mg_globals.setup_globals(
408 translations=this_gettext)
409
410
411 PAGINATION_DEFAULT_PER_PAGE = 30
412
413 class Pagination(object):
414 """
415 Pagination class for mongodb queries.
416
417 Initialization through __init__(self, cursor, page=1, per_page=2),
418 get actual data slice through __call__().
419 """
420
421 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE):
422 """
423 Initializes Pagination
424
425 Args:
426 - page: requested page
427 - per_page: number of objects per page
428 - cursor: db cursor
429 """
430 self.page = page
431 self.per_page = per_page
432 self.cursor = cursor
433 self.total_count = self.cursor.count()
434
435 def __call__(self):
436 """
437 Returns slice of objects for the requested page
438 """
439 return self.cursor.skip(
440 (self.page - 1) * self.per_page).limit(self.per_page)
441
442 @property
443 def pages(self):
444 return int(ceil(self.total_count / float(self.per_page)))
445
446 @property
447 def has_prev(self):
448 return self.page > 1
449
450 @property
451 def has_next(self):
452 return self.page < self.pages
453
454 def iter_pages(self, left_edge=2, left_current=2,
455 right_current=5, right_edge=2):
456 last = 0
457 for num in xrange(1, self.pages + 1):
458 if num <= left_edge or \
459 (num > self.page - left_current - 1 and \
460 num < self.page + right_current) or \
461 num > self.pages - right_edge:
462 if last + 1 != num:
463 yield None
464 yield num
465 last = num
466
467 def get_page_url_explicit(self, base_url, get_params, page_no):
468 """
469 Get a page url by adding a page= parameter to the base url
470 """
471 new_get_params = copy.copy(get_params or {})
472 new_get_params['page'] = page_no
473 return "%s?%s" % (
474 base_url, urllib.urlencode(new_get_params))
475
476 def get_page_url(self, request, page_no):
477 """
478 Get a new page url based of the request, and the new page number.
479
480 This is a nice wrapper around get_page_url_explicit()
481 """
482 return self.get_page_url_explicit(
483 request.path_info, request.GET, page_no)