Adds util.cleaned_markdown_conversion() and uses it in the submission process
[mediagoblin.git] / mediagoblin / util.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
e5572c60
ML
2# Copyright (C) 2011 Free Software Foundation, Inc
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
4d4f6050 17from email.MIMEText import MIMEText
b77eec65
CAW
18import gettext
19import pkg_resources
4d4f6050 20import smtplib
cb8ea0fe 21import sys
0546833c 22import re
c5678c1a
CAW
23import urllib
24from math import ceil
25import copy
26
f99f61c6 27from babel.localedata import exists
31a8ff42 28import jinja2
0546833c 29import translitcodec
9150244a 30from webob import Response, exc
a68ee555 31from lxml.html.clean import Cleaner
4bf8e888 32import markdown
31a8ff42 33
6e7ce8d1 34from mediagoblin import mg_globals
c5678c1a 35from mediagoblin.db.util import ObjectId
29f3fb70 36
4d4f6050
CAW
37TESTS_ENABLED = False
38def _activate_testing():
39 """
40 Call this to activate testing in util.py
41 """
42 global TESTS_ENABLED
43 TESTS_ENABLED = True
44
45
66471f0e
CAW
46def clear_test_buckets():
47 """
48 We store some things for testing purposes that should be cleared
49 when we want a "clean slate" of information for our next round of
50 tests. Call this function to wipe all that stuff clean.
51
52 Also wipes out some other things we might redefine during testing,
53 like the jinja envs.
54 """
55 global SETUP_JINJA_ENVS
56 SETUP_JINJA_ENVS = {}
57
58 global EMAIL_TEST_INBOX
59 global EMAIL_TEST_MBOX_INBOX
60 EMAIL_TEST_INBOX = []
61 EMAIL_TEST_MBOX_INBOX = []
62
63 clear_test_template_context()
64
65
0e0e3d9a 66def get_jinja_loader(user_template_path=None):
904f61c2 67 """
0e0e3d9a 68 Set up the Jinja template loaders, possibly allowing for user
904f61c2
CAW
69 overridden templates.
70
71 (In the future we may have another system for providing theming;
72 for now this is good enough.)
73 """
31a8ff42 74 if user_template_path:
0e0e3d9a 75 return jinja2.ChoiceLoader(
31a8ff42
CAW
76 [jinja2.FileSystemLoader(user_template_path),
77 jinja2.PackageLoader('mediagoblin', 'templates')])
78 else:
0e0e3d9a 79 return jinja2.PackageLoader('mediagoblin', 'templates')
31a8ff42 80
0e0e3d9a 81
f99f61c6
CAW
82SETUP_JINJA_ENVS = {}
83
84
0e0e3d9a
CAW
85def get_jinja_env(template_loader, locale):
86 """
87 Set up the Jinja environment,
88
89 (In the future we may have another system for providing theming;
90 for now this is good enough.)
91 """
b77eec65
CAW
92 setup_gettext(locale)
93
f99f61c6
CAW
94 # If we have a jinja environment set up with this locale, just
95 # return that one.
96 if SETUP_JINJA_ENVS.has_key(locale):
97 return SETUP_JINJA_ENVS[locale]
98
b77eec65 99 template_env = jinja2.Environment(
0e0e3d9a 100 loader=template_loader, autoescape=True,
44e2da2f 101 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
58dec5ef 102
b77eec65 103 template_env.install_gettext_callables(
6e7ce8d1
CAW
104 mg_globals.translations.gettext,
105 mg_globals.translations.ngettext)
b77eec65 106
f99f61c6
CAW
107 if exists(locale):
108 SETUP_JINJA_ENVS[locale] = template_env
109
b77eec65
CAW
110 return template_env
111
58dec5ef 112
e9279f21
CAW
113# We'll store context information here when doing unit tests
114TEMPLATE_TEST_CONTEXT = {}
115
116
67e8c45d 117def render_template(request, template_path, context):
e9279f21
CAW
118 """
119 Render a template with context.
120
121 Always inserts the request into the context, so you don't have to.
122 Also stores the context if we're doing unit tests. Helpful!
123 """
124 template = request.template_env.get_template(
67e8c45d 125 template_path)
e9279f21
CAW
126 context['request'] = request
127 rendered = template.render(context)
128
129 if TESTS_ENABLED:
67e8c45d 130 TEMPLATE_TEST_CONTEXT[template_path] = context
e9279f21
CAW
131
132 return rendered
133
134
135def clear_test_template_context():
136 global TEMPLATE_TEST_CONTEXT
137 TEMPLATE_TEST_CONTEXT = {}
138
139
1c63ad5d
E
140def render_to_response(request, template, context):
141 """Much like Django's shortcut.render()"""
142 return Response(render_template(request, template, context))
143
144
9150244a
E
145def redirect(request, *args, **kwargs):
146 """Returns a HTTPFound(), takes a request and then urlgen params"""
147 return exc.HTTPFound(location=request.urlgen(*args, **kwargs))
148
149
58dec5ef
CAW
150def setup_user_in_request(request):
151 """
152 Examine a request and tack on a request.user parameter if that's
153 appropriate.
154 """
155 if not request.session.has_key('user_id'):
59dd5c7e 156 request.user = None
58dec5ef
CAW
157 return
158
5d6840a0 159 user = None
6648c52b 160 user = request.app.db.User.one(
254bc431 161 {'_id': ObjectId(request.session['user_id'])})
5d6840a0 162
c74e1462
CAW
163 if not user:
164 # Something's wrong... this user doesn't exist? Invalidate
165 # this session.
58dec5ef 166 request.session.invalidate()
5d6840a0
CAW
167
168 request.user = user
cb8ea0fe
CAW
169
170
171def import_component(import_string):
172 """
173 Import a module component defined by STRING. Probably a method,
174 class, or global variable.
175
176 Args:
177 - import_string: a string that defines what to import. Written
178 in the format of "module1.module2:component"
179 """
180 module_name, func_name = import_string.split(':', 1)
181 __import__(module_name)
182 module = sys.modules[module_name]
183 func = getattr(module, func_name)
184 return func
4d4f6050 185
0546833c
AW
186_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
187
188def slugify(text, delim=u'-'):
189 """
190 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
191 """
192 result = []
193 for word in _punct_re.split(text.lower()):
194 word = word.encode('translit/long')
195 if word:
196 result.append(word)
197 return unicode(delim.join(result))
4d4f6050
CAW
198
199### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
200### Special email test stuff begins HERE
201### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202
203# We have two "test inboxes" here:
204#
205# EMAIL_TEST_INBOX:
206# ----------------
207# If you're writing test views, you'll probably want to check this.
208# It contains a list of MIMEText messages.
209#
210# EMAIL_TEST_MBOX_INBOX:
211# ----------------------
212# This collects the messages from the FakeMhost inbox. It's reslly
213# just here for testing the send_email method itself.
214#
215# Anyway this contains:
216# - from
217# - to: a list of email recipient addresses
218# - message: not just the body, but the whole message, including
219# headers, etc.
220#
221# ***IMPORTANT!***
222# ----------------
223# Before running tests that call functions which send email, you should
224# always call _clear_test_inboxes() to "wipe" the inboxes clean.
225
226EMAIL_TEST_INBOX = []
227EMAIL_TEST_MBOX_INBOX = []
228
229
230class FakeMhost(object):
231 """
232 Just a fake mail host so we can capture and test messages
233 from send_email
234 """
235 def connect(self):
236 pass
237
238 def sendmail(self, from_addr, to_addrs, message):
239 EMAIL_TEST_MBOX_INBOX.append(
240 {'from': from_addr,
241 'to': to_addrs,
242 'message': message})
243
244def _clear_test_inboxes():
245 global EMAIL_TEST_INBOX
246 global EMAIL_TEST_MBOX_INBOX
247 EMAIL_TEST_INBOX = []
248 EMAIL_TEST_MBOX_INBOX = []
249
250### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
251### </Special email test stuff>
252### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
253
254def send_email(from_addr, to_addrs, subject, message_body):
61ec968b
CAW
255 """
256 Simple email sending wrapper, use this so we can capture messages
257 for unit testing purposes.
258
259 Args:
260 - from_addr: address you're sending the email from
261 - to_addrs: list of recipient email addresses
262 - subject: subject of the email
263 - message_body: email body text
264 """
4d4f6050 265 # TODO: make a mock mhost if testing is enabled
6e7ce8d1 266 if TESTS_ENABLED or mg_globals.email_debug_mode:
4d4f6050 267 mhost = FakeMhost()
6e7ce8d1 268 elif not mg_globals.email_debug_mode:
4d4f6050
CAW
269 mhost = smtplib.SMTP()
270
271 mhost.connect()
272
273 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
274 message['Subject'] = subject
275 message['From'] = from_addr
276 message['To'] = ', '.join(to_addrs)
277
278 if TESTS_ENABLED:
279 EMAIL_TEST_INBOX.append(message)
280
6e7ce8d1 281 if getattr(mg_globals, 'email_debug_mode', False):
29f3fb70
CAW
282 print u"===== Email ====="
283 print u"From address: %s" % message['From']
284 print u"To addresses: %s" % message['To']
285 print u"Subject: %s" % message['Subject']
286 print u"-- Body: --"
287 print message.get_payload(decode=True)
288
21919313 289 return mhost.sendmail(from_addr, to_addrs, message.as_string())
20c834ff 290
8b28bee4
CAW
291
292###################
293# Translation tools
294###################
295
296
b77eec65
CAW
297TRANSLATIONS_PATH = pkg_resources.resource_filename(
298 'mediagoblin', 'translations')
299
300
8b28bee4
CAW
301def locale_to_lower_upper(locale):
302 """
303 Take a locale, regardless of style, and format it like "en-us"
304 """
305 if '-' in locale:
306 lang, country = locale.split('-', 1)
307 return '%s_%s' % (lang.lower(), country.upper())
308 elif '_' in locale:
309 lang, country = locale.split('_', 1)
310 return '%s_%s' % (lang.lower(), country.upper())
311 else:
312 return locale.lower()
313
314
315def locale_to_lower_lower(locale):
316 """
317 Take a locale, regardless of style, and format it like "en_US"
318 """
319 if '_' in locale:
320 lang, country = locale.split('_', 1)
321 return '%s-%s' % (lang.lower(), country.lower())
322 else:
323 return locale.lower()
324
325
326def get_locale_from_request(request):
327 """
328 Figure out what target language is most appropriate based on the
329 request
330 """
331 request_form = request.GET or request.POST
332
333 if request_form.has_key('lang'):
334 return locale_to_lower_upper(request_form['lang'])
335
336 accept_lang_matches = request.accept_language.best_matches()
337
338 # Your routing can explicitly specify a target language
376e6ef2
CAW
339 if request.matchdict.has_key('locale'):
340 target_lang = request.matchdict['locale']
8b28bee4
CAW
341 elif request.session.has_key('target_lang'):
342 target_lang = request.session['target_lang']
343 # Pull the first acceptable language
344 elif accept_lang_matches:
345 target_lang = accept_lang_matches[0]
346 # Fall back to English
347 else:
348 target_lang = 'en'
349
0e0e3d9a 350 return locale_to_lower_upper(target_lang)
b77eec65
CAW
351
352
a68ee555
CAW
353# A super strict version of the lxml.html cleaner class
354HTML_CLEANER = Cleaner(
355 scripts=True,
356 javascript=True,
357 comments=True,
358 style=True,
359 links=True,
360 page_structure=True,
361 processing_instructions=True,
362 embedded=True,
363 frames=True,
364 forms=True,
365 annoying_tags=True,
366 allow_tags=[
367 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
368 remove_unknown_tags=False, # can't be used with allow_tags
369 safe_attrs_only=True,
370 add_nofollow=True, # for now
371 host_whitelist=(),
372 whitelist_tags=set([]))
373
374
375def clean_html(html):
376 return HTML_CLEANER.clean_html(html)
377
378
4bf8e888
CAW
379MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
380
381
382def cleaned_markdown_conversion(text):
383 """
384 Take a block of text, run it through MarkDown, and clean its HTML.
385 """
386 return clean_html(MARKDOWN_INSTANCE.convert(text))
387
388
f99f61c6
CAW
389SETUP_GETTEXTS = {}
390
b77eec65
CAW
391def setup_gettext(locale):
392 """
393 Setup the gettext instance based on this locale
394 """
395 # Later on when we have plugins we may want to enable the
396 # multi-translations system they have so we can handle plugin
397 # translations too
398
399 # TODO: fallback nicely on translations from pt_PT to pt if not
400 # available, etc.
f99f61c6
CAW
401 if SETUP_GETTEXTS.has_key(locale):
402 this_gettext = SETUP_GETTEXTS[locale]
403 else:
404 this_gettext = gettext.translation(
405 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
406 if exists(locale):
407 SETUP_GETTEXTS[locale] = this_gettext
b77eec65 408
6e7ce8d1 409 mg_globals.setup_globals(
b77eec65 410 translations=this_gettext)
ae85ed0f
BK
411
412
b9e9610b
CAW
413PAGINATION_DEFAULT_PER_PAGE = 30
414
ae85ed0f
BK
415class Pagination(object):
416 """
dffa0b09
CAW
417 Pagination class for mongodb queries.
418
419 Initialization through __init__(self, cursor, page=1, per_page=2),
420 get actual data slice through __call__().
ae85ed0f 421 """
ca3ca51c 422
b9e9610b 423 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE):
44e3e917 424 """
a98d5254
CAW
425 Initializes Pagination
426
427 Args:
428 - page: requested page
429 - per_page: number of objects per page
430 - cursor: db cursor
44e3e917
BK
431 """
432 self.page = page
ca3ca51c
BK
433 self.per_page = per_page
434 self.cursor = cursor
ca3ca51c
BK
435 self.total_count = self.cursor.count()
436
437 def __call__(self):
44e3e917 438 """
a98d5254 439 Returns slice of objects for the requested page
44e3e917 440 """
140e2102
CAW
441 return self.cursor.skip(
442 (self.page - 1) * self.per_page).limit(self.per_page)
ae85ed0f
BK
443
444 @property
445 def pages(self):
446 return int(ceil(self.total_count / float(self.per_page)))
447
448 @property
449 def has_prev(self):
450 return self.page > 1
451
452 @property
453 def has_next(self):
454 return self.page < self.pages
455
456 def iter_pages(self, left_edge=2, left_current=2,
457 right_current=5, right_edge=2):
458 last = 0
459 for num in xrange(1, self.pages + 1):
460 if num <= left_edge or \
461 (num > self.page - left_current - 1 and \
462 num < self.page + right_current) or \
463 num > self.pages - right_edge:
464 if last + 1 != num:
465 yield None
466 yield num
467 last = num
44e3e917 468
50c880ac 469 def get_page_url_explicit(self, base_url, get_params, page_no):
44e3e917 470 """
50c880ac 471 Get a page url by adding a page= parameter to the base url
44e3e917
BK
472 """
473 new_get_params = copy.copy(get_params or {})
474 new_get_params['page'] = page_no
475 return "%s?%s" % (
50c880ac
CAW
476 base_url, urllib.urlencode(new_get_params))
477
478 def get_page_url(self, request, page_no):
479 """
480 Get a new page url based of the request, and the new page number.
481
482 This is a nice wrapper around get_page_url_explicit()
483 """
484 return self.get_page_url_explicit(
485 request.path_info, request.GET, page_no)