Adds tag unit testing
[mediagoblin.git] / mediagoblin / util.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
e5572c60
ML
2# Copyright (C) 2011 Free Software Foundation, Inc
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
4d4f6050 17from email.MIMEText import MIMEText
b77eec65
CAW
18import gettext
19import pkg_resources
4d4f6050 20import smtplib
cb8ea0fe 21import sys
0546833c 22import re
c5678c1a
CAW
23import urllib
24from math import ceil
cdf538bd 25from string import strip
c5678c1a 26import copy
909371cd 27import wtforms
c5678c1a 28
f99f61c6 29from babel.localedata import exists
31a8ff42 30import jinja2
0546833c 31import translitcodec
9150244a 32from webob import Response, exc
a68ee555 33from lxml.html.clean import Cleaner
4bf8e888 34import markdown
31a8ff42 35
6e7ce8d1 36from mediagoblin import mg_globals
22646703 37from mediagoblin import messages
c5678c1a 38from mediagoblin.db.util import ObjectId
29f3fb70 39
4d4f6050
CAW
40TESTS_ENABLED = False
41def _activate_testing():
42 """
43 Call this to activate testing in util.py
44 """
45 global TESTS_ENABLED
46 TESTS_ENABLED = True
47
48
66471f0e
CAW
49def clear_test_buckets():
50 """
51 We store some things for testing purposes that should be cleared
52 when we want a "clean slate" of information for our next round of
53 tests. Call this function to wipe all that stuff clean.
54
55 Also wipes out some other things we might redefine during testing,
56 like the jinja envs.
57 """
58 global SETUP_JINJA_ENVS
59 SETUP_JINJA_ENVS = {}
60
61 global EMAIL_TEST_INBOX
62 global EMAIL_TEST_MBOX_INBOX
63 EMAIL_TEST_INBOX = []
64 EMAIL_TEST_MBOX_INBOX = []
65
66 clear_test_template_context()
67
68
f99f61c6
CAW
69SETUP_JINJA_ENVS = {}
70
71
0e0e3d9a
CAW
72def get_jinja_env(template_loader, locale):
73 """
74 Set up the Jinja environment,
75
76 (In the future we may have another system for providing theming;
77 for now this is good enough.)
78 """
b77eec65
CAW
79 setup_gettext(locale)
80
f99f61c6
CAW
81 # If we have a jinja environment set up with this locale, just
82 # return that one.
83 if SETUP_JINJA_ENVS.has_key(locale):
84 return SETUP_JINJA_ENVS[locale]
85
b77eec65 86 template_env = jinja2.Environment(
0e0e3d9a 87 loader=template_loader, autoescape=True,
44e2da2f 88 extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
58dec5ef 89
b77eec65 90 template_env.install_gettext_callables(
6e7ce8d1
CAW
91 mg_globals.translations.gettext,
92 mg_globals.translations.ngettext)
b77eec65 93
22646703 94 # All templates will know how to ...
22646703
CFD
95 # ... fetch all waiting messages and remove them from the queue
96 template_env.globals['fetch_messages'] = messages.fetch_messages
97
f99f61c6
CAW
98 if exists(locale):
99 SETUP_JINJA_ENVS[locale] = template_env
100
b77eec65
CAW
101 return template_env
102
58dec5ef 103
e9279f21
CAW
104# We'll store context information here when doing unit tests
105TEMPLATE_TEST_CONTEXT = {}
106
107
67e8c45d 108def render_template(request, template_path, context):
e9279f21
CAW
109 """
110 Render a template with context.
111
112 Always inserts the request into the context, so you don't have to.
113 Also stores the context if we're doing unit tests. Helpful!
114 """
115 template = request.template_env.get_template(
67e8c45d 116 template_path)
e9279f21
CAW
117 context['request'] = request
118 rendered = template.render(context)
119
120 if TESTS_ENABLED:
67e8c45d 121 TEMPLATE_TEST_CONTEXT[template_path] = context
e9279f21
CAW
122
123 return rendered
124
125
126def clear_test_template_context():
127 global TEMPLATE_TEST_CONTEXT
128 TEMPLATE_TEST_CONTEXT = {}
129
130
1c63ad5d
E
131def render_to_response(request, template, context):
132 """Much like Django's shortcut.render()"""
133 return Response(render_template(request, template, context))
134
135
9150244a
E
136def redirect(request, *args, **kwargs):
137 """Returns a HTTPFound(), takes a request and then urlgen params"""
138 return exc.HTTPFound(location=request.urlgen(*args, **kwargs))
139
140
58dec5ef
CAW
141def setup_user_in_request(request):
142 """
143 Examine a request and tack on a request.user parameter if that's
144 appropriate.
145 """
146 if not request.session.has_key('user_id'):
59dd5c7e 147 request.user = None
58dec5ef
CAW
148 return
149
5d6840a0 150 user = None
6648c52b 151 user = request.app.db.User.one(
254bc431 152 {'_id': ObjectId(request.session['user_id'])})
5d6840a0 153
c74e1462
CAW
154 if not user:
155 # Something's wrong... this user doesn't exist? Invalidate
156 # this session.
58dec5ef 157 request.session.invalidate()
5d6840a0
CAW
158
159 request.user = user
cb8ea0fe
CAW
160
161
162def import_component(import_string):
163 """
164 Import a module component defined by STRING. Probably a method,
165 class, or global variable.
166
167 Args:
168 - import_string: a string that defines what to import. Written
169 in the format of "module1.module2:component"
170 """
171 module_name, func_name = import_string.split(':', 1)
172 __import__(module_name)
173 module = sys.modules[module_name]
174 func = getattr(module, func_name)
175 return func
4d4f6050 176
0546833c
AW
177_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
178
179def slugify(text, delim=u'-'):
180 """
181 Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
182 """
183 result = []
184 for word in _punct_re.split(text.lower()):
185 word = word.encode('translit/long')
186 if word:
187 result.append(word)
188 return unicode(delim.join(result))
4d4f6050
CAW
189
190### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
191### Special email test stuff begins HERE
192### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193
194# We have two "test inboxes" here:
195#
196# EMAIL_TEST_INBOX:
197# ----------------
198# If you're writing test views, you'll probably want to check this.
199# It contains a list of MIMEText messages.
200#
201# EMAIL_TEST_MBOX_INBOX:
202# ----------------------
203# This collects the messages from the FakeMhost inbox. It's reslly
204# just here for testing the send_email method itself.
205#
206# Anyway this contains:
207# - from
208# - to: a list of email recipient addresses
209# - message: not just the body, but the whole message, including
210# headers, etc.
211#
212# ***IMPORTANT!***
213# ----------------
214# Before running tests that call functions which send email, you should
215# always call _clear_test_inboxes() to "wipe" the inboxes clean.
216
217EMAIL_TEST_INBOX = []
218EMAIL_TEST_MBOX_INBOX = []
219
220
221class FakeMhost(object):
222 """
223 Just a fake mail host so we can capture and test messages
224 from send_email
225 """
226 def connect(self):
227 pass
228
229 def sendmail(self, from_addr, to_addrs, message):
230 EMAIL_TEST_MBOX_INBOX.append(
231 {'from': from_addr,
232 'to': to_addrs,
233 'message': message})
234
235def _clear_test_inboxes():
236 global EMAIL_TEST_INBOX
237 global EMAIL_TEST_MBOX_INBOX
238 EMAIL_TEST_INBOX = []
239 EMAIL_TEST_MBOX_INBOX = []
240
241### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
242### </Special email test stuff>
243### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
244
245def send_email(from_addr, to_addrs, subject, message_body):
61ec968b
CAW
246 """
247 Simple email sending wrapper, use this so we can capture messages
248 for unit testing purposes.
249
250 Args:
251 - from_addr: address you're sending the email from
252 - to_addrs: list of recipient email addresses
253 - subject: subject of the email
254 - message_body: email body text
255 """
4d4f6050 256 # TODO: make a mock mhost if testing is enabled
6e7ce8d1 257 if TESTS_ENABLED or mg_globals.email_debug_mode:
4d4f6050 258 mhost = FakeMhost()
6e7ce8d1 259 elif not mg_globals.email_debug_mode:
4d4f6050
CAW
260 mhost = smtplib.SMTP()
261
262 mhost.connect()
263
264 message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
265 message['Subject'] = subject
266 message['From'] = from_addr
267 message['To'] = ', '.join(to_addrs)
268
269 if TESTS_ENABLED:
270 EMAIL_TEST_INBOX.append(message)
271
6e7ce8d1 272 if getattr(mg_globals, 'email_debug_mode', False):
29f3fb70
CAW
273 print u"===== Email ====="
274 print u"From address: %s" % message['From']
275 print u"To addresses: %s" % message['To']
276 print u"Subject: %s" % message['Subject']
277 print u"-- Body: --"
278 print message.get_payload(decode=True)
279
21919313 280 return mhost.sendmail(from_addr, to_addrs, message.as_string())
20c834ff 281
8b28bee4
CAW
282
283###################
284# Translation tools
285###################
286
287
b77eec65
CAW
288TRANSLATIONS_PATH = pkg_resources.resource_filename(
289 'mediagoblin', 'translations')
290
291
8b28bee4
CAW
292def locale_to_lower_upper(locale):
293 """
294 Take a locale, regardless of style, and format it like "en-us"
295 """
296 if '-' in locale:
297 lang, country = locale.split('-', 1)
298 return '%s_%s' % (lang.lower(), country.upper())
299 elif '_' in locale:
300 lang, country = locale.split('_', 1)
301 return '%s_%s' % (lang.lower(), country.upper())
302 else:
303 return locale.lower()
304
305
306def locale_to_lower_lower(locale):
307 """
308 Take a locale, regardless of style, and format it like "en_US"
309 """
310 if '_' in locale:
311 lang, country = locale.split('_', 1)
312 return '%s-%s' % (lang.lower(), country.lower())
313 else:
314 return locale.lower()
315
316
317def get_locale_from_request(request):
318 """
319 Figure out what target language is most appropriate based on the
320 request
321 """
322 request_form = request.GET or request.POST
323
324 if request_form.has_key('lang'):
325 return locale_to_lower_upper(request_form['lang'])
326
327 accept_lang_matches = request.accept_language.best_matches()
328
329 # Your routing can explicitly specify a target language
376e6ef2
CAW
330 if request.matchdict.has_key('locale'):
331 target_lang = request.matchdict['locale']
8b28bee4
CAW
332 elif request.session.has_key('target_lang'):
333 target_lang = request.session['target_lang']
334 # Pull the first acceptable language
335 elif accept_lang_matches:
336 target_lang = accept_lang_matches[0]
337 # Fall back to English
338 else:
339 target_lang = 'en'
340
0e0e3d9a 341 return locale_to_lower_upper(target_lang)
b77eec65
CAW
342
343
a68ee555
CAW
344# A super strict version of the lxml.html cleaner class
345HTML_CLEANER = Cleaner(
346 scripts=True,
347 javascript=True,
348 comments=True,
349 style=True,
350 links=True,
351 page_structure=True,
352 processing_instructions=True,
353 embedded=True,
354 frames=True,
355 forms=True,
356 annoying_tags=True,
357 allow_tags=[
358 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
359 remove_unknown_tags=False, # can't be used with allow_tags
360 safe_attrs_only=True,
361 add_nofollow=True, # for now
362 host_whitelist=(),
363 whitelist_tags=set([]))
364
365
366def clean_html(html):
4fd18da0
CAW
367 # clean_html barfs on an empty string
368 if not html:
369 return u''
370
a68ee555
CAW
371 return HTML_CLEANER.clean_html(html)
372
373
0712a06d 374def convert_to_tag_list_of_dicts(tag_string):
cdf538bd 375 """
909371cd 376 Filter input from incoming string containing user tags,
4bf8e888 377
cdf538bd 378 Strips trailing, leading, and internal whitespace, and also converts
cc7ff3c5 379 the "tags" text into an array of tags
cdf538bd 380 """
6f2e4585 381 taglist = []
cdf538bd 382 if tag_string:
cc7ff3c5
CFD
383
384 # Strip out internal, trailing, and leading whitespace
93e3468a 385 stripped_tag_string = u' '.join(tag_string.strip().split())
cc7ff3c5
CFD
386
387 # Split the tag string into a list of tags
10d7496d
CFD
388 for tag in stripped_tag_string.split(
389 mg_globals.app_config['tags_delimiter']):
cc7ff3c5 390
f99b5cae
CFD
391 # Ignore empty or duplicate tags
392 if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
cc7ff3c5 393
10d7496d 394 if mg_globals.app_config['tags_case_sensitive']:
0712a06d
CFD
395 taglist.append({'name': tag.strip(),
396 'slug': slugify(tag.strip())})
6f2e4585 397 else:
0712a06d
CFD
398 taglist.append({'name': tag.strip().lower(),
399 'slug': slugify(tag.strip().lower())})
6f2e4585 400 return taglist
cdf538bd
CFD
401
402
0712a06d
CFD
403def media_tags_as_string(media_entry_tags):
404 """
405 Generate a string from a media item's tags, stored as a list of dicts
406
407 This is the opposite of convert_to_tag_list_of_dicts
408 """
409 media_tag_string = ''
410 if media_entry_tags:
411 media_tag_string = mg_globals.app_config['tags_delimiter'].join(
412 [tag['name'] for tag in media_entry_tags])
413 return media_tag_string
414
909371cd
CFD
415TOO_LONG_TAG_WARNING = \
416 u'Tags must be shorter than %s characters. Tags that are too long: %s'
417
418def tag_length_validator(form, field):
419 """
420 Make sure tags do not exceed the maximum tag length.
421 """
0712a06d 422 tags = convert_to_tag_list_of_dicts(field.data)
909371cd 423 too_long_tags = [
0712a06d
CFD
424 tag['name'] for tag in tags
425 if len(tag['name']) > mg_globals.app_config['tags_max_length']]
909371cd
CFD
426
427 if too_long_tags:
428 raise wtforms.ValidationError(
10d7496d
CFD
429 TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
430 ', '.join(too_long_tags)))
909371cd
CFD
431
432
cdf538bd 433MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
4bf8e888
CAW
434
435def cleaned_markdown_conversion(text):
436 """
437 Take a block of text, run it through MarkDown, and clean its HTML.
438 """
82688846
CAW
439 # Markdown will do nothing with and clean_html can do nothing with
440 # an empty string :)
441 if not text:
442 return u''
443
4bf8e888
CAW
444 return clean_html(MARKDOWN_INSTANCE.convert(text))
445
446
f99f61c6
CAW
447SETUP_GETTEXTS = {}
448
b77eec65
CAW
449def setup_gettext(locale):
450 """
451 Setup the gettext instance based on this locale
452 """
453 # Later on when we have plugins we may want to enable the
454 # multi-translations system they have so we can handle plugin
455 # translations too
456
457 # TODO: fallback nicely on translations from pt_PT to pt if not
458 # available, etc.
f99f61c6
CAW
459 if SETUP_GETTEXTS.has_key(locale):
460 this_gettext = SETUP_GETTEXTS[locale]
461 else:
462 this_gettext = gettext.translation(
463 'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
464 if exists(locale):
465 SETUP_GETTEXTS[locale] = this_gettext
b77eec65 466
6e7ce8d1 467 mg_globals.setup_globals(
b77eec65 468 translations=this_gettext)
ae85ed0f
BK
469
470
b9e9610b
CAW
471PAGINATION_DEFAULT_PER_PAGE = 30
472
ae85ed0f
BK
473class Pagination(object):
474 """
dffa0b09
CAW
475 Pagination class for mongodb queries.
476
477 Initialization through __init__(self, cursor, page=1, per_page=2),
478 get actual data slice through __call__().
ae85ed0f 479 """
ca3ca51c 480
b9e9610b 481 def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE):
44e3e917 482 """
a98d5254
CAW
483 Initializes Pagination
484
485 Args:
486 - page: requested page
487 - per_page: number of objects per page
488 - cursor: db cursor
44e3e917
BK
489 """
490 self.page = page
ca3ca51c
BK
491 self.per_page = per_page
492 self.cursor = cursor
ca3ca51c
BK
493 self.total_count = self.cursor.count()
494
495 def __call__(self):
44e3e917 496 """
a98d5254 497 Returns slice of objects for the requested page
44e3e917 498 """
140e2102
CAW
499 return self.cursor.skip(
500 (self.page - 1) * self.per_page).limit(self.per_page)
ae85ed0f
BK
501
502 @property
503 def pages(self):
504 return int(ceil(self.total_count / float(self.per_page)))
505
506 @property
507 def has_prev(self):
508 return self.page > 1
509
510 @property
511 def has_next(self):
512 return self.page < self.pages
513
514 def iter_pages(self, left_edge=2, left_current=2,
515 right_current=5, right_edge=2):
516 last = 0
517 for num in xrange(1, self.pages + 1):
518 if num <= left_edge or \
519 (num > self.page - left_current - 1 and \
520 num < self.page + right_current) or \
521 num > self.pages - right_edge:
522 if last + 1 != num:
523 yield None
524 yield num
525 last = num
44e3e917 526
50c880ac 527 def get_page_url_explicit(self, base_url, get_params, page_no):
44e3e917 528 """
50c880ac 529 Get a page url by adding a page= parameter to the base url
44e3e917
BK
530 """
531 new_get_params = copy.copy(get_params or {})
532 new_get_params['page'] = page_no
533 return "%s?%s" % (
50c880ac
CAW
534 base_url, urllib.urlencode(new_get_params))
535
536 def get_page_url(self, request, page_no):
537 """
538 Get a new page url based of the request, and the new page number.
539
540 This is a nice wrapper around get_page_url_explicit()
541 """
542 return self.get_page_url_explicit(
543 request.path_info, request.GET, page_no)