[mediagoblin.git] / mediagoblin / util.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 Free Software Foundation, Inc
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division

from email.MIMEText import MIMEText
import gettext
import pkg_resources
import smtplib
import sys
import re
import urllib
from math import ceil, floor
import copy
import wtforms

from babel.localedata import exists
from babel.support import LazyProxy
import jinja2
import translitcodec
from webob import Response, exc
from lxml.html.clean import Cleaner
import markdown
from wtforms.form import Form

from mediagoblin import mg_globals
from mediagoblin import messages
from mediagoblin.db.util import ObjectId

from itertools import izip, count

DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']

TESTS_ENABLED = False
def _activate_testing():
    """
    Call this to activate testing in util.py
    """
    global TESTS_ENABLED
    TESTS_ENABLED = True


def clear_test_buckets():
    """
    We store some things for testing purposes that should be cleared
    when we want a "clean slate" of information for our next round of
    tests.  Call this function to wipe all that stuff clean.

    Also wipes out some other things we might redefine during testing,
    like the jinja envs.
    """
    global SETUP_JINJA_ENVS
    SETUP_JINJA_ENVS = {}

    global EMAIL_TEST_INBOX
    global EMAIL_TEST_MBOX_INBOX
    EMAIL_TEST_INBOX = []
    EMAIL_TEST_MBOX_INBOX = []

    clear_test_template_context()


SETUP_JINJA_ENVS = {}


def get_jinja_env(template_loader, locale):
    """
    Set up the Jinja environment, 

    (In the future we may have another system for providing theming;
    for now this is good enough.)
    """
    setup_gettext(locale)

    # If we have a jinja environment set up with this locale, just
    # return that one.
    if SETUP_JINJA_ENVS.has_key(locale):
        return SETUP_JINJA_ENVS[locale]

    template_env = jinja2.Environment(
        loader=template_loader, autoescape=True,
        extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])

    template_env.install_gettext_callables(
        mg_globals.translations.ugettext,
        mg_globals.translations.ungettext)

    # All templates will know how to ...
    # ... fetch all waiting messages and remove them from the queue
    template_env.globals['fetch_messages'] = messages.fetch_messages

    if exists(locale):
        SETUP_JINJA_ENVS[locale] = template_env

    return template_env


# We'll store context information here when doing unit tests
TEMPLATE_TEST_CONTEXT = {}


def render_template(request, template_path, context):
    """
    Render a template with context.

    Always inserts the request into the context, so you don't have to.
    Also stores the context if we're doing unit tests.  Helpful!
    """
    template = request.template_env.get_template(
        template_path)
    context['request'] = request
    rendered = template.render(context)

    if TESTS_ENABLED:
        TEMPLATE_TEST_CONTEXT[template_path] = context

    return rendered


def clear_test_template_context():
    global TEMPLATE_TEST_CONTEXT
    TEMPLATE_TEST_CONTEXT = {}


def render_to_response(request, template, context):
    """Much like Django's shortcut.render()"""
    return Response(render_template(request, template, context))


def redirect(request, *args, **kwargs):
    """Returns a HTTPFound(), takes a request and then urlgen params"""
    
    querystring = None
    if kwargs.get('querystring'):
        querystring = kwargs.get('querystring')
        del kwargs['querystring']

    return exc.HTTPFound(
        location=''.join([
                request.urlgen(*args, **kwargs),
                querystring if querystring else '']))


def setup_user_in_request(request):
    """
    Examine a request and tack on a request.user parameter if that's
    appropriate.
    """
    if not request.session.has_key('user_id'):
        request.user = None
        return

    user = None
    user = request.app.db.User.one(
        {'_id': ObjectId(request.session['user_id'])})

    if not user:
        # Something's wrong... this user doesn't exist?  Invalidate
        # this session.
        request.session.invalidate()

    request.user = user


def import_component(import_string):
    """
    Import a module component defined by STRING.  Probably a method,
    class, or global variable.

    Args:
     - import_string: a string that defines what to import.  Written
       in the format of "module1.module2:component"
    """
    module_name, func_name = import_string.split(':', 1)
    __import__(module_name)
    module = sys.modules[module_name]
    func = getattr(module, func_name)
    return func

_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')

def slugify(text, delim=u'-'):
    """
    Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
    """
    result = []
    for word in _punct_re.split(text.lower()):
        word = word.encode('translit/long')
        if word:
            result.append(word)
    return unicode(delim.join(result))

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### Special email test stuff begins HERE
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# We have two "test inboxes" here:
# 
# EMAIL_TEST_INBOX:
# ----------------
#   If you're writing test views, you'll probably want to check this.
#   It contains a list of MIMEText messages.
#
# EMAIL_TEST_MBOX_INBOX:
# ----------------------
#   This collects the messages from the FakeMhost inbox.  It's reslly
#   just here for testing the send_email method itself.
#
#   Anyway this contains:
#    - from
#    - to: a list of email recipient addresses
#    - message: not just the body, but the whole message, including
#      headers, etc.
#
# ***IMPORTANT!***
# ----------------
# Before running tests that call functions which send email, you should
# always call _clear_test_inboxes() to "wipe" the inboxes clean. 

EMAIL_TEST_INBOX = []
EMAIL_TEST_MBOX_INBOX = []


class FakeMhost(object):
    """
    Just a fake mail host so we can capture and test messages
    from send_email
    """
    def connect(self):
        pass

    def sendmail(self, from_addr, to_addrs, message):
        EMAIL_TEST_MBOX_INBOX.append(
            {'from': from_addr,
             'to': to_addrs,
             'message': message})

def _clear_test_inboxes():
    global EMAIL_TEST_INBOX
    global EMAIL_TEST_MBOX_INBOX
    EMAIL_TEST_INBOX = []
    EMAIL_TEST_MBOX_INBOX = []

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### </Special email test stuff>
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def send_email(from_addr, to_addrs, subject, message_body):
    """
    Simple email sending wrapper, use this so we can capture messages
    for unit testing purposes.

    Args:
     - from_addr: address you're sending the email from
     - to_addrs: list of recipient email addresses
     - subject: subject of the email
     - message_body: email body text
    """
    # TODO: make a mock mhost if testing is enabled
    if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
        mhost = FakeMhost()
    elif not mg_globals.app_config['email_debug_mode']:
        mhost = smtplib.SMTP()

    mhost.connect()

    message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
    message['Subject'] = subject
    message['From'] = from_addr
    message['To'] = ', '.join(to_addrs)

    if TESTS_ENABLED:
        EMAIL_TEST_INBOX.append(message)

    if mg_globals.app_config['email_debug_mode']:
        print u"===== Email ====="
        print u"From address: %s" % message['From']
        print u"To addresses: %s" % message['To']
        print u"Subject: %s" % message['Subject']
        print u"-- Body: --"
        print message.get_payload(decode=True)

    return mhost.sendmail(from_addr, to_addrs, message.as_string())


###################
# Translation tools
###################


TRANSLATIONS_PATH = pkg_resources.resource_filename(
    'mediagoblin', 'i18n')


def locale_to_lower_upper(locale):
    """
    Take a locale, regardless of style, and format it like "en-us"
    """
    if '-' in locale:
        lang, country = locale.split('-', 1)
        return '%s_%s' % (lang.lower(), country.upper())
    elif '_' in locale:
        lang, country = locale.split('_', 1)
        return '%s_%s' % (lang.lower(), country.upper())
    else:
        return locale.lower()


def locale_to_lower_lower(locale):
    """
    Take a locale, regardless of style, and format it like "en_US"
    """
    if '_' in locale:
        lang, country = locale.split('_', 1)
        return '%s-%s' % (lang.lower(), country.lower())
    else:
        return locale.lower()


def get_locale_from_request(request):
    """
    Figure out what target language is most appropriate based on the
    request
    """
    request_form = request.GET or request.POST

    if request_form.has_key('lang'):
        return locale_to_lower_upper(request_form['lang'])

    accept_lang_matches = request.accept_language.best_matches()

    # Your routing can explicitly specify a target language
    if request.matchdict.has_key('locale'):
        target_lang = request.matchdict['locale']
    elif request.session.has_key('target_lang'):
        target_lang = request.session['target_lang']
    # Pull the first acceptable language
    elif accept_lang_matches:
        target_lang = accept_lang_matches[0]
    # Fall back to English
    else:
        target_lang = 'en'

    return locale_to_lower_upper(target_lang)


# A super strict version of the lxml.html cleaner class
HTML_CLEANER = Cleaner(
    scripts=True,
    javascript=True,
    comments=True,
    style=True,
    links=True,
    page_structure=True,
    processing_instructions=True,
    embedded=True,
    frames=True,
    forms=True,
    annoying_tags=True,
    allow_tags=[
        'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
    remove_unknown_tags=False, # can't be used with allow_tags
    safe_attrs_only=True,
    add_nofollow=True, # for now
    host_whitelist=(),
    whitelist_tags=set([]))


def clean_html(html):
    # clean_html barfs on an empty string
    if not html:
        return u''

    return HTML_CLEANER.clean_html(html)


def convert_to_tag_list_of_dicts(tag_string):
    """
    Filter input from incoming string containing user tags,

    Strips trailing, leading, and internal whitespace, and also converts
    the "tags" text into an array of tags
    """
    taglist = []
    if tag_string:

        # Strip out internal, trailing, and leading whitespace
        stripped_tag_string = u' '.join(tag_string.strip().split())

        # Split the tag string into a list of tags
        for tag in stripped_tag_string.split(
                                       mg_globals.app_config['tags_delimiter']):

            # Ignore empty or duplicate tags
            if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:

                taglist.append({'name': tag.strip(),
                                'slug': slugify(tag.strip())})
    return taglist


def media_tags_as_string(media_entry_tags):
    """
    Generate a string from a media item's tags, stored as a list of dicts

    This is the opposite of convert_to_tag_list_of_dicts
    """
    media_tag_string = ''
    if media_entry_tags:
        media_tag_string = mg_globals.app_config['tags_delimiter'].join(
                                      [tag['name'] for tag in media_entry_tags])
    return media_tag_string

TOO_LONG_TAG_WARNING = \
    u'Tags must be shorter than %s characters.  Tags that are too long: %s'

def tag_length_validator(form, field):
    """
    Make sure tags do not exceed the maximum tag length.
    """
    tags = convert_to_tag_list_of_dicts(field.data)
    too_long_tags = [
        tag['name'] for tag in tags
        if len(tag['name']) > mg_globals.app_config['tags_max_length']]

    if too_long_tags:
        raise wtforms.ValidationError(
            TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
                                    ', '.join(too_long_tags)))


MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')

def cleaned_markdown_conversion(text):
    """
    Take a block of text, run it through MarkDown, and clean its HTML.
    """
    # Markdown will do nothing with and clean_html can do nothing with
    # an empty string :)
    if not text:
        return u''

    return clean_html(MARKDOWN_INSTANCE.convert(text))


SETUP_GETTEXTS = {}

def setup_gettext(locale):
    """
    Setup the gettext instance based on this locale
    """
    # Later on when we have plugins we may want to enable the
    # multi-translations system they have so we can handle plugin
    # translations too

    # TODO: fallback nicely on translations from pt_PT to pt if not
    # available, etc.
    if SETUP_GETTEXTS.has_key(locale):
        this_gettext = SETUP_GETTEXTS[locale]
    else:
        this_gettext = gettext.translation(
            'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
        if exists(locale):
            SETUP_GETTEXTS[locale] = this_gettext

    mg_globals.setup_globals(
        translations=this_gettext)


# Force en to be setup before anything else so that
# mg_globals.translations is never None
setup_gettext('en')


def pass_to_ugettext(*args, **kwargs):
    """
    Pass a translation on to the appropriate ugettext method.

    The reason we can't have a global ugettext method is because
    mg_globals gets swapped out by the application per-request.
    """
    return mg_globals.translations.ugettext(
        *args, **kwargs)


def lazy_pass_to_ugettext(*args, **kwargs):
    """
    Lazily pass to ugettext.

    This is useful if you have to define a translation on a module
    level but you need it to not translate until the time that it's
    used as a string.
    """
    return LazyProxy(pass_to_ugettext, *args, **kwargs)


def pass_to_ngettext(*args, **kwargs):
    """
    Pass a translation on to the appropriate ngettext method.

    The reason we can't have a global ngettext method is because
    mg_globals gets swapped out by the application per-request.
    """
    return mg_globals.translations.ngettext(
        *args, **kwargs)


def lazy_pass_to_ngettext(*args, **kwargs):
    """
    Lazily pass to ngettext.

    This is useful if you have to define a translation on a module
    level but you need it to not translate until the time that it's
    used as a string.
    """
    return LazyProxy(pass_to_ngettext, *args, **kwargs)


def fake_ugettext_passthrough(string):
    """
    Fake a ugettext call for extraction's sake ;)

    In wtforms there's a separate way to define a method to translate
    things... so we just need to mark up the text so that it can be
    extracted, not so that it's actually run through gettext.
    """
    return string


PAGINATION_DEFAULT_PER_PAGE = 30

class Pagination(object):
    """
    Pagination class for mongodb queries.

    Initialization through __init__(self, cursor, page=1, per_page=2),
    get actual data slice through __call__().
    """

    def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
                 jump_to_id=False):
        """
        Initializes Pagination

        Args:
         - page: requested page
         - per_page: number of objects per page
         - cursor: db cursor 
         - jump_to_id: ObjectId, sets the page to the page containing the object
           with _id == jump_to_id.
        """
        self.page = page
        self.per_page = per_page
        self.cursor = cursor
        self.total_count = self.cursor.count()
        self.active_id = None

        if jump_to_id:
            cursor = copy.copy(self.cursor)

            for (doc, increment) in izip(cursor, count(0)):
                if doc['_id'] == jump_to_id:
                    self.page = 1 + int(floor(increment / self.per_page))

                    self.active_id = jump_to_id
                    break


    def __call__(self):
        """
        Returns slice of objects for the requested page
        """
        return self.cursor.skip(
            (self.page - 1) * self.per_page).limit(self.per_page)

    @property
    def pages(self):
        return int(ceil(self.total_count / float(self.per_page)))

    @property
    def has_prev(self):
        return self.page > 1

    @property
    def has_next(self):
        return self.page < self.pages

    def iter_pages(self, left_edge=2, left_current=2,
                   right_current=5, right_edge=2):
        last = 0
        for num in xrange(1, self.pages + 1):
            if num <= left_edge or \
               (num > self.page - left_current - 1 and \
                num < self.page + right_current) or \
               num > self.pages - right_edge:
                if last + 1 != num:
                    yield None
                yield num
                last = num

    def get_page_url_explicit(self, base_url, get_params, page_no):
        """ 
        Get a page url by adding a page= parameter to the base url
        """ 
        new_get_params = copy.copy(get_params or {})
        new_get_params['page'] = page_no
        return "%s?%s" % (
            base_url, urllib.urlencode(new_get_params))

    def get_page_url(self, request, page_no):
        """ 
        Get a new page url based of the request, and the new page number.

        This is a nice wrapper around get_page_url_explicit()
        """ 
        return self.get_page_url_explicit(
            request.path_info, request.GET, page_no)
Commit	Line	Data
8e1e744d	1	# GNU MediaGoblin -- federated, autonomous media hosting
e5572c60 ML	2	# Copyright (C) 2011 Free Software Foundation, Inc
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
af2fcba5 JW	17	from __future__ import division
af2fcba5 JW	18
4d4f6050	19	from email.MIMEText import MIMEText
b77eec65 CAW	20	import gettext
b77eec65 CAW	21	import pkg_resources
4d4f6050	22	import smtplib
cb8ea0fe	23	import sys
0546833c	24	import re
c5678c1a	25	import urllib
af2fcba5	26	from math import ceil, floor
c5678c1a	27	import copy
909371cd	28	import wtforms
c5678c1a	29
f99f61c6	30	from babel.localedata import exists
1c266dc3	31	from babel.support import LazyProxy
31a8ff42	32	import jinja2
0546833c	33	import translitcodec
9150244a	34	from webob import Response, exc
a68ee555	35	from lxml.html.clean import Cleaner
4bf8e888	36	import markdown
1c266dc3	37	from wtforms.form import Form
31a8ff42	38
6e7ce8d1	39	from mediagoblin import mg_globals
22646703	40	from mediagoblin import messages
c5678c1a	41	from mediagoblin.db.util import ObjectId
29f3fb70	42
af2fcba5 JW	43	from itertools import izip, count
af2fcba5 JW	44
2c9e635a JW	45	DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
2c9e635a JW	46
4d4f6050 CAW	47	TESTS_ENABLED = False
	48	def _activate_testing():
	49	"""
	50	Call this to activate testing in util.py
	51	"""
	52	global TESTS_ENABLED
	53	TESTS_ENABLED = True
	54
	55
66471f0e CAW	56	def clear_test_buckets():
	57	"""
	58	We store some things for testing purposes that should be cleared
	59	when we want a "clean slate" of information for our next round of
	60	tests. Call this function to wipe all that stuff clean.
	61
	62	Also wipes out some other things we might redefine during testing,
	63	like the jinja envs.
	64	"""
	65	global SETUP_JINJA_ENVS
	66	SETUP_JINJA_ENVS = {}
	67
	68	global EMAIL_TEST_INBOX
	69	global EMAIL_TEST_MBOX_INBOX
	70	EMAIL_TEST_INBOX = []
	71	EMAIL_TEST_MBOX_INBOX = []
	72
	73	clear_test_template_context()
	74
	75
f99f61c6 CAW	76	SETUP_JINJA_ENVS = {}
	77
	78
0e0e3d9a CAW	79	def get_jinja_env(template_loader, locale):
	80	"""
	81	Set up the Jinja environment,
	82
	83	(In the future we may have another system for providing theming;
	84	for now this is good enough.)
	85	"""
b77eec65 CAW	86	setup_gettext(locale)
b77eec65 CAW	87
f99f61c6 CAW	88	# If we have a jinja environment set up with this locale, just
	89	# return that one.
	90	if SETUP_JINJA_ENVS.has_key(locale):
	91	return SETUP_JINJA_ENVS[locale]
	92
b77eec65	93	template_env = jinja2.Environment(
0e0e3d9a	94	loader=template_loader, autoescape=True,
44e2da2f	95	extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
58dec5ef	96
b77eec65	97	template_env.install_gettext_callables(
84f27964	98	mg_globals.translations.ugettext,
1c266dc3	99	mg_globals.translations.ungettext)
b77eec65	100
22646703	101	# All templates will know how to ...
22646703 CFD	102	# ... fetch all waiting messages and remove them from the queue
	103	template_env.globals['fetch_messages'] = messages.fetch_messages
	104
f99f61c6 CAW	105	if exists(locale):
	106	SETUP_JINJA_ENVS[locale] = template_env
	107
b77eec65 CAW	108	return template_env
b77eec65 CAW	109
58dec5ef	110
e9279f21 CAW	111	# We'll store context information here when doing unit tests
	112	TEMPLATE_TEST_CONTEXT = {}
	113
	114
67e8c45d	115	def render_template(request, template_path, context):
e9279f21 CAW	116	"""
	117	Render a template with context.
	118
	119	Always inserts the request into the context, so you don't have to.
	120	Also stores the context if we're doing unit tests. Helpful!
	121	"""
	122	template = request.template_env.get_template(
67e8c45d	123	template_path)
e9279f21 CAW	124	context['request'] = request
	125	rendered = template.render(context)
	126
	127	if TESTS_ENABLED:
67e8c45d	128	TEMPLATE_TEST_CONTEXT[template_path] = context
e9279f21 CAW	129
	130	return rendered
	131
	132
	133	def clear_test_template_context():
	134	global TEMPLATE_TEST_CONTEXT
	135	TEMPLATE_TEST_CONTEXT = {}
	136
	137
1c63ad5d E	138	def render_to_response(request, template, context):
	139	"""Much like Django's shortcut.render()"""
	140	return Response(render_template(request, template, context))
	141
	142
9150244a E	143	def redirect(request, args, *kwargs):
9150244a E	144	"""Returns a HTTPFound(), takes a request and then urlgen params"""
af2fcba5 JW	145
	146	querystring = None
	147	if kwargs.get('querystring'):
	148	querystring = kwargs.get('querystring')
	149	del kwargs['querystring']
	150
	151	return exc.HTTPFound(
	152	location=''.join([
	153	request.urlgen(args, *kwargs),
	154	querystring if querystring else '']))
9150244a E	155
9150244a E	156
58dec5ef CAW	157	def setup_user_in_request(request):
	158	"""
	159	Examine a request and tack on a request.user parameter if that's
	160	appropriate.
	161	"""
	162	if not request.session.has_key('user_id'):
59dd5c7e	163	request.user = None
58dec5ef CAW	164	return
58dec5ef CAW	165
5d6840a0	166	user = None
6648c52b	167	user = request.app.db.User.one(
254bc431	168	{'_id': ObjectId(request.session['user_id'])})
5d6840a0	169
c74e1462 CAW	170	if not user:
	171	# Something's wrong... this user doesn't exist? Invalidate
	172	# this session.
58dec5ef	173	request.session.invalidate()
5d6840a0 CAW	174
5d6840a0 CAW	175	request.user = user
cb8ea0fe CAW	176
	177
	178	def import_component(import_string):
	179	"""
	180	Import a module component defined by STRING. Probably a method,
	181	class, or global variable.
	182
	183	Args:
	184	- import_string: a string that defines what to import. Written
	185	in the format of "module1.module2:component"
	186	"""
	187	module_name, func_name = import_string.split(':', 1)
	188	__import__(module_name)
	189	module = sys.modules[module_name]
	190	func = getattr(module, func_name)
	191	return func
4d4f6050	192
0546833c AW	193	_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{\|},.]+')
	194
	195	def slugify(text, delim=u'-'):
	196	"""
	197	Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
	198	"""
	199	result = []
	200	for word in _punct_re.split(text.lower()):
	201	word = word.encode('translit/long')
	202	if word:
	203	result.append(word)
	204	return unicode(delim.join(result))
4d4f6050 CAW	205
	206	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	207	### Special email test stuff begins HERE
	208	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	209
	210	# We have two "test inboxes" here:
	211	#
	212	# EMAIL_TEST_INBOX:
	213	# ----------------
	214	# If you're writing test views, you'll probably want to check this.
	215	# It contains a list of MIMEText messages.
	216	#
	217	# EMAIL_TEST_MBOX_INBOX:
	218	# ----------------------
	219	# This collects the messages from the FakeMhost inbox. It's reslly
	220	# just here for testing the send_email method itself.
	221	#
	222	# Anyway this contains:
	223	# - from
	224	# - to: a list of email recipient addresses
	225	# - message: not just the body, but the whole message, including
	226	# headers, etc.
	227	#
	228	# *IMPORTANT!*
	229	# ----------------
	230	# Before running tests that call functions which send email, you should
	231	# always call _clear_test_inboxes() to "wipe" the inboxes clean.
	232
	233	EMAIL_TEST_INBOX = []
	234	EMAIL_TEST_MBOX_INBOX = []
	235
	236
	237	class FakeMhost(object):
	238	"""
	239	Just a fake mail host so we can capture and test messages
	240	from send_email
	241	"""
	242	def connect(self):
	243	pass
	244
	245	def sendmail(self, from_addr, to_addrs, message):
	246	EMAIL_TEST_MBOX_INBOX.append(
	247	{'from': from_addr,
	248	'to': to_addrs,
	249	'message': message})
	250
	251	def _clear_test_inboxes():
	252	global EMAIL_TEST_INBOX
	253	global EMAIL_TEST_MBOX_INBOX
	254	EMAIL_TEST_INBOX = []
	255	EMAIL_TEST_MBOX_INBOX = []
	256
	257	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	258	### </Special email test stuff>
	259	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	260
	261	def send_email(from_addr, to_addrs, subject, message_body):
61ec968b CAW	262	"""
	263	Simple email sending wrapper, use this so we can capture messages
	264	for unit testing purposes.
	265
	266	Args:
	267	- from_addr: address you're sending the email from
	268	- to_addrs: list of recipient email addresses
	269	- subject: subject of the email
	270	- message_body: email body text
	271	"""
4d4f6050	272	# TODO: make a mock mhost if testing is enabled
6ae8b541	273	if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
4d4f6050	274	mhost = FakeMhost()
6ae8b541	275	elif not mg_globals.app_config['email_debug_mode']:
4d4f6050 CAW	276	mhost = smtplib.SMTP()
	277
	278	mhost.connect()
	279
	280	message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
	281	message['Subject'] = subject
	282	message['From'] = from_addr
	283	message['To'] = ', '.join(to_addrs)
	284
	285	if TESTS_ENABLED:
	286	EMAIL_TEST_INBOX.append(message)
	287
6ae8b541	288	if mg_globals.app_config['email_debug_mode']:
29f3fb70 CAW	289	print u"===== Email ====="
	290	print u"From address: %s" % message['From']
	291	print u"To addresses: %s" % message['To']
	292	print u"Subject: %s" % message['Subject']
	293	print u"-- Body: --"
	294	print message.get_payload(decode=True)
	295
21919313	296	return mhost.sendmail(from_addr, to_addrs, message.as_string())
20c834ff	297
8b28bee4 CAW	298
	299	###################
	300	# Translation tools
	301	###################
	302
	303
b77eec65	304	TRANSLATIONS_PATH = pkg_resources.resource_filename(
145922b8	305	'mediagoblin', 'i18n')
b77eec65 CAW	306
b77eec65 CAW	307
8b28bee4 CAW	308	def locale_to_lower_upper(locale):
	309	"""
	310	Take a locale, regardless of style, and format it like "en-us"
	311	"""
	312	if '-' in locale:
	313	lang, country = locale.split('-', 1)
	314	return '%s_%s' % (lang.lower(), country.upper())
	315	elif '_' in locale:
	316	lang, country = locale.split('_', 1)
	317	return '%s_%s' % (lang.lower(), country.upper())
	318	else:
	319	return locale.lower()
	320
	321
	322	def locale_to_lower_lower(locale):
	323	"""
	324	Take a locale, regardless of style, and format it like "en_US"
	325	"""
	326	if '_' in locale:
	327	lang, country = locale.split('_', 1)
	328	return '%s-%s' % (lang.lower(), country.lower())
	329	else:
	330	return locale.lower()
	331
	332
	333	def get_locale_from_request(request):
	334	"""
	335	Figure out what target language is most appropriate based on the
	336	request
	337	"""
	338	request_form = request.GET or request.POST
	339
	340	if request_form.has_key('lang'):
	341	return locale_to_lower_upper(request_form['lang'])
	342
	343	accept_lang_matches = request.accept_language.best_matches()
	344
	345	# Your routing can explicitly specify a target language
376e6ef2 CAW	346	if request.matchdict.has_key('locale'):
376e6ef2 CAW	347	target_lang = request.matchdict['locale']
8b28bee4 CAW	348	elif request.session.has_key('target_lang'):
	349	target_lang = request.session['target_lang']
	350	# Pull the first acceptable language
	351	elif accept_lang_matches:
	352	target_lang = accept_lang_matches[0]
	353	# Fall back to English
	354	else:
	355	target_lang = 'en'
	356
0e0e3d9a	357	return locale_to_lower_upper(target_lang)
b77eec65 CAW	358
b77eec65 CAW	359
a68ee555 CAW	360	# A super strict version of the lxml.html cleaner class
	361	HTML_CLEANER = Cleaner(
	362	scripts=True,
	363	javascript=True,
	364	comments=True,
	365	style=True,
	366	links=True,
	367	page_structure=True,
	368	processing_instructions=True,
	369	embedded=True,
	370	frames=True,
	371	forms=True,
	372	annoying_tags=True,
	373	allow_tags=[
	374	'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
	375	remove_unknown_tags=False, # can't be used with allow_tags
	376	safe_attrs_only=True,
	377	add_nofollow=True, # for now
	378	host_whitelist=(),
	379	whitelist_tags=set([]))
	380
	381
	382	def clean_html(html):
4fd18da0 CAW	383	# clean_html barfs on an empty string
	384	if not html:
	385	return u''
	386
a68ee555 CAW	387	return HTML_CLEANER.clean_html(html)
	388
	389
0712a06d	390	def convert_to_tag_list_of_dicts(tag_string):
cdf538bd	391	"""
909371cd	392	Filter input from incoming string containing user tags,
4bf8e888	393
cdf538bd	394	Strips trailing, leading, and internal whitespace, and also converts
cc7ff3c5	395	the "tags" text into an array of tags
cdf538bd	396	"""
6f2e4585	397	taglist = []
cdf538bd	398	if tag_string:
cc7ff3c5 CFD	399
cc7ff3c5 CFD	400	# Strip out internal, trailing, and leading whitespace
93e3468a	401	stripped_tag_string = u' '.join(tag_string.strip().split())
cc7ff3c5 CFD	402
cc7ff3c5 CFD	403	# Split the tag string into a list of tags
10d7496d CFD	404	for tag in stripped_tag_string.split(
10d7496d CFD	405	mg_globals.app_config['tags_delimiter']):
cc7ff3c5	406
f99b5cae CFD	407	# Ignore empty or duplicate tags
f99b5cae CFD	408	if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
cc7ff3c5	409
1b89b817 CAW	410	taglist.append({'name': tag.strip(),
1b89b817 CAW	411	'slug': slugify(tag.strip())})
6f2e4585	412	return taglist
cdf538bd CFD	413
cdf538bd CFD	414
0712a06d CFD	415	def media_tags_as_string(media_entry_tags):
	416	"""
	417	Generate a string from a media item's tags, stored as a list of dicts
	418
	419	This is the opposite of convert_to_tag_list_of_dicts
	420	"""
	421	media_tag_string = ''
	422	if media_entry_tags:
	423	media_tag_string = mg_globals.app_config['tags_delimiter'].join(
	424	[tag['name'] for tag in media_entry_tags])
	425	return media_tag_string
	426
909371cd CFD	427	TOO_LONG_TAG_WARNING = \
	428	u'Tags must be shorter than %s characters. Tags that are too long: %s'
	429
	430	def tag_length_validator(form, field):
	431	"""
	432	Make sure tags do not exceed the maximum tag length.
	433	"""
0712a06d	434	tags = convert_to_tag_list_of_dicts(field.data)
909371cd	435	too_long_tags = [
0712a06d CFD	436	tag['name'] for tag in tags
0712a06d CFD	437	if len(tag['name']) > mg_globals.app_config['tags_max_length']]
909371cd CFD	438
	439	if too_long_tags:
	440	raise wtforms.ValidationError(
10d7496d CFD	441	TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
10d7496d CFD	442	', '.join(too_long_tags)))
4bf8e888 CAW	443
4bf8e888 CAW	444
cdf538bd	445	MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
4bf8e888 CAW	446
	447	def cleaned_markdown_conversion(text):
	448	"""
	449	Take a block of text, run it through MarkDown, and clean its HTML.
	450	"""
82688846 CAW	451	# Markdown will do nothing with and clean_html can do nothing with
	452	# an empty string :)
	453	if not text:
	454	return u''
	455
4bf8e888 CAW	456	return clean_html(MARKDOWN_INSTANCE.convert(text))
	457
	458
f99f61c6 CAW	459	SETUP_GETTEXTS = {}
f99f61c6 CAW	460
b77eec65 CAW	461	def setup_gettext(locale):
	462	"""
	463	Setup the gettext instance based on this locale
	464	"""
	465	# Later on when we have plugins we may want to enable the
	466	# multi-translations system they have so we can handle plugin
	467	# translations too
	468
	469	# TODO: fallback nicely on translations from pt_PT to pt if not
	470	# available, etc.
f99f61c6 CAW	471	if SETUP_GETTEXTS.has_key(locale):
	472	this_gettext = SETUP_GETTEXTS[locale]
	473	else:
	474	this_gettext = gettext.translation(
	475	'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
	476	if exists(locale):
	477	SETUP_GETTEXTS[locale] = this_gettext
b77eec65	478
6e7ce8d1	479	mg_globals.setup_globals(
b77eec65	480	translations=this_gettext)
ae85ed0f BK	481
ae85ed0f BK	482
03e5bd6d CAW	483	# Force en to be setup before anything else so that
	484	# mg_globals.translations is never None
	485	setup_gettext('en')
	486
	487
	488	def pass_to_ugettext(args, *kwargs):
	489	"""
	490	Pass a translation on to the appropriate ugettext method.
	491
	492	The reason we can't have a global ugettext method is because
	493	mg_globals gets swapped out by the application per-request.
	494	"""
	495	return mg_globals.translations.ugettext(
	496	args, *kwargs)
	497
	498
1c266dc3 CAW	499	def lazy_pass_to_ugettext(args, *kwargs):
	500	"""
	501	Lazily pass to ugettext.
	502
	503	This is useful if you have to define a translation on a module
	504	level but you need it to not translate until the time that it's
	505	used as a string.
	506	"""
	507	return LazyProxy(pass_to_ugettext, args, *kwargs)
	508
	509
	510	def pass_to_ngettext(args, *kwargs):
	511	"""
	512	Pass a translation on to the appropriate ngettext method.
	513
	514	The reason we can't have a global ngettext method is because
	515	mg_globals gets swapped out by the application per-request.
	516	"""
	517	return mg_globals.translations.ngettext(
	518	args, *kwargs)
	519
	520
	521	def lazy_pass_to_ngettext(args, *kwargs):
	522	"""
	523	Lazily pass to ngettext.
	524
	525	This is useful if you have to define a translation on a module
	526	level but you need it to not translate until the time that it's
	527	used as a string.
	528	"""
	529	return LazyProxy(pass_to_ngettext, args, *kwargs)
	530
	531
	532	def fake_ugettext_passthrough(string):
	533	"""
	534	Fake a ugettext call for extraction's sake ;)
	535
	536	In wtforms there's a separate way to define a method to translate
	537	things... so we just need to mark up the text so that it can be
	538	extracted, not so that it's actually run through gettext.
	539	"""
	540	return string
	541
	542
b9e9610b CAW	543	PAGINATION_DEFAULT_PER_PAGE = 30
b9e9610b CAW	544
ae85ed0f BK	545	class Pagination(object):
ae85ed0f BK	546	"""
dffa0b09 CAW	547	Pagination class for mongodb queries.
	548
	549	Initialization through __init__(self, cursor, page=1, per_page=2),
	550	get actual data slice through __call__().
ae85ed0f	551	"""
ca3ca51c	552
af2fcba5 JW	553	def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
af2fcba5 JW	554	jump_to_id=False):
44e3e917	555	"""
a98d5254 CAW	556	Initializes Pagination
	557
	558	Args:
	559	- page: requested page
	560	- per_page: number of objects per page
	561	- cursor: db cursor
af2fcba5 JW	562	- jump_to_id: ObjectId, sets the page to the page containing the object
af2fcba5 JW	563	with _id == jump_to_id.
44e3e917	564	"""
af2fcba5	565	self.page = page
ca3ca51c BK	566	self.per_page = per_page
ca3ca51c BK	567	self.cursor = cursor
ca3ca51c	568	self.total_count = self.cursor.count()
af2fcba5 JW	569	self.active_id = None
	570
	571	if jump_to_id:
	572	cursor = copy.copy(self.cursor)
	573
	574	for (doc, increment) in izip(cursor, count(0)):
	575	if doc['_id'] == jump_to_id:
	576	self.page = 1 + int(floor(increment / self.per_page))
	577
	578	self.active_id = jump_to_id
	579	break
	580
ca3ca51c BK	581
ca3ca51c BK	582	def __call__(self):
44e3e917	583	"""
a98d5254	584	Returns slice of objects for the requested page
44e3e917	585	"""
140e2102 CAW	586	return self.cursor.skip(
140e2102 CAW	587	(self.page - 1) * self.per_page).limit(self.per_page)
ae85ed0f BK	588
	589	@property
	590	def pages(self):
	591	return int(ceil(self.total_count / float(self.per_page)))
	592
	593	@property
	594	def has_prev(self):
	595	return self.page > 1
	596
	597	@property
	598	def has_next(self):
	599	return self.page < self.pages
	600
	601	def iter_pages(self, left_edge=2, left_current=2,
	602	right_current=5, right_edge=2):
	603	last = 0
	604	for num in xrange(1, self.pages + 1):
	605	if num <= left_edge or \
	606	(num > self.page - left_current - 1 and \
	607	num < self.page + right_current) or \
	608	num > self.pages - right_edge:
	609	if last + 1 != num:
	610	yield None
	611	yield num
	612	last = num
44e3e917	613
50c880ac	614	def get_page_url_explicit(self, base_url, get_params, page_no):
44e3e917	615	"""
50c880ac	616	Get a page url by adding a page= parameter to the base url
44e3e917 BK	617	"""
	618	new_get_params = copy.copy(get_params or {})
	619	new_get_params['page'] = page_no
	620	return "%s?%s" % (
50c880ac CAW	621	base_url, urllib.urlencode(new_get_params))
	622
	623	def get_page_url(self, request, page_no):
	624	"""
	625	Get a new page url based of the request, and the new page number.
	626
	627	This is a nice wrapper around get_page_url_explicit()
	628	"""
	629	return self.get_page_url_explicit(
	630	request.path_info, request.GET, page_no)