[mediagoblin.git] / mediagoblin / util.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 Free Software Foundation, Inc
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division

from email.MIMEText import MIMEText
import gettext
import pkg_resources
import smtplib
import sys
import re
import urllib
from math import ceil, floor
import copy
import wtforms

from babel.localedata import exists
from babel.support import LazyProxy
import jinja2
import translitcodec
from webob import Response, exc
from lxml.html.clean import Cleaner
import markdown
from wtforms.form import Form

from mediagoblin import mg_globals
from mediagoblin import messages
from mediagoblin.db.util import ObjectId

from itertools import izip, count

DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']

TESTS_ENABLED = False
def _activate_testing():
    """
    Call this to activate testing in util.py
    """
    global TESTS_ENABLED
    TESTS_ENABLED = True


def clear_test_buckets():
    """
    We store some things for testing purposes that should be cleared
    when we want a "clean slate" of information for our next round of
    tests.  Call this function to wipe all that stuff clean.

    Also wipes out some other things we might redefine during testing,
    like the jinja envs.
    """
    global SETUP_JINJA_ENVS
    SETUP_JINJA_ENVS = {}

    global EMAIL_TEST_INBOX
    global EMAIL_TEST_MBOX_INBOX
    EMAIL_TEST_INBOX = []
    EMAIL_TEST_MBOX_INBOX = []

    clear_test_template_context()


SETUP_JINJA_ENVS = {}


def get_jinja_env(template_loader, locale):
    """
    Set up the Jinja environment, 

    (In the future we may have another system for providing theming;
    for now this is good enough.)
    """
    setup_gettext(locale)

    # If we have a jinja environment set up with this locale, just
    # return that one.
    if SETUP_JINJA_ENVS.has_key(locale):
        return SETUP_JINJA_ENVS[locale]

    template_env = jinja2.Environment(
        loader=template_loader, autoescape=True,
        extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])

    template_env.install_gettext_callables(
        mg_globals.translations.ugettext,
        mg_globals.translations.ungettext)

    # All templates will know how to ...
    # ... fetch all waiting messages and remove them from the queue
    # ... construct a grid of thumbnails or other media
    template_env.globals['fetch_messages'] = messages.fetch_messages
    template_env.globals['gridify_list'] = gridify_list
    template_env.globals['gridify_cursor'] = gridify_cursor

    if exists(locale):
        SETUP_JINJA_ENVS[locale] = template_env

    return template_env


# We'll store context information here when doing unit tests
TEMPLATE_TEST_CONTEXT = {}


def render_template(request, template_path, context):
    """
    Render a template with context.

    Always inserts the request into the context, so you don't have to.
    Also stores the context if we're doing unit tests.  Helpful!
    """
    template = request.template_env.get_template(
        template_path)
    context['request'] = request
    rendered = template.render(context)

    if TESTS_ENABLED:
        TEMPLATE_TEST_CONTEXT[template_path] = context

    return rendered


def clear_test_template_context():
    global TEMPLATE_TEST_CONTEXT
    TEMPLATE_TEST_CONTEXT = {}


def render_to_response(request, template, context):
    """Much like Django's shortcut.render()"""
    return Response(render_template(request, template, context))


def redirect(request, *args, **kwargs):
    """Returns a HTTPFound(), takes a request and then urlgen params"""
    
    querystring = None
    if kwargs.get('querystring'):
        querystring = kwargs.get('querystring')
        del kwargs['querystring']

    return exc.HTTPFound(
        location=''.join([
                request.urlgen(*args, **kwargs),
                querystring if querystring else '']))


def setup_user_in_request(request):
    """
    Examine a request and tack on a request.user parameter if that's
    appropriate.
    """
    if not request.session.has_key('user_id'):
        request.user = None
        return

    user = None
    user = request.app.db.User.one(
        {'_id': ObjectId(request.session['user_id'])})

    if not user:
        # Something's wrong... this user doesn't exist?  Invalidate
        # this session.
        request.session.invalidate()

    request.user = user


def import_component(import_string):
    """
    Import a module component defined by STRING.  Probably a method,
    class, or global variable.

    Args:
     - import_string: a string that defines what to import.  Written
       in the format of "module1.module2:component"
    """
    module_name, func_name = import_string.split(':', 1)
    __import__(module_name)
    module = sys.modules[module_name]
    func = getattr(module, func_name)
    return func

_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')

def slugify(text, delim=u'-'):
    """
    Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
    """
    result = []
    for word in _punct_re.split(text.lower()):
        word = word.encode('translit/long')
        if word:
            result.append(word)
    return unicode(delim.join(result))

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### Special email test stuff begins HERE
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# We have two "test inboxes" here:
# 
# EMAIL_TEST_INBOX:
# ----------------
#   If you're writing test views, you'll probably want to check this.
#   It contains a list of MIMEText messages.
#
# EMAIL_TEST_MBOX_INBOX:
# ----------------------
#   This collects the messages from the FakeMhost inbox.  It's reslly
#   just here for testing the send_email method itself.
#
#   Anyway this contains:
#    - from
#    - to: a list of email recipient addresses
#    - message: not just the body, but the whole message, including
#      headers, etc.
#
# ***IMPORTANT!***
# ----------------
# Before running tests that call functions which send email, you should
# always call _clear_test_inboxes() to "wipe" the inboxes clean. 

EMAIL_TEST_INBOX = []
EMAIL_TEST_MBOX_INBOX = []


class FakeMhost(object):
    """
    Just a fake mail host so we can capture and test messages
    from send_email
    """
    def connect(self):
        pass

    def sendmail(self, from_addr, to_addrs, message):
        EMAIL_TEST_MBOX_INBOX.append(
            {'from': from_addr,
             'to': to_addrs,
             'message': message})

def _clear_test_inboxes():
    global EMAIL_TEST_INBOX
    global EMAIL_TEST_MBOX_INBOX
    EMAIL_TEST_INBOX = []
    EMAIL_TEST_MBOX_INBOX = []

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### </Special email test stuff>
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def send_email(from_addr, to_addrs, subject, message_body):
    """
    Simple email sending wrapper, use this so we can capture messages
    for unit testing purposes.

    Args:
     - from_addr: address you're sending the email from
     - to_addrs: list of recipient email addresses
     - subject: subject of the email
     - message_body: email body text
    """
    # TODO: make a mock mhost if testing is enabled
    if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
        mhost = FakeMhost()
    elif not mg_globals.app_config['email_debug_mode']:
        mhost = smtplib.SMTP()

    mhost.connect()

    message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
    message['Subject'] = subject
    message['From'] = from_addr
    message['To'] = ', '.join(to_addrs)

    if TESTS_ENABLED:
        EMAIL_TEST_INBOX.append(message)

    if mg_globals.app_config['email_debug_mode']:
        print u"===== Email ====="
        print u"From address: %s" % message['From']
        print u"To addresses: %s" % message['To']
        print u"Subject: %s" % message['Subject']
        print u"-- Body: --"
        print message.get_payload(decode=True)

    return mhost.sendmail(from_addr, to_addrs, message.as_string())


###################
# Translation tools
###################


TRANSLATIONS_PATH = pkg_resources.resource_filename(
    'mediagoblin', 'i18n')


def locale_to_lower_upper(locale):
    """
    Take a locale, regardless of style, and format it like "en-us"
    """
    if '-' in locale:
        lang, country = locale.split('-', 1)
        return '%s_%s' % (lang.lower(), country.upper())
    elif '_' in locale:
        lang, country = locale.split('_', 1)
        return '%s_%s' % (lang.lower(), country.upper())
    else:
        return locale.lower()


def locale_to_lower_lower(locale):
    """
    Take a locale, regardless of style, and format it like "en_US"
    """
    if '_' in locale:
        lang, country = locale.split('_', 1)
        return '%s-%s' % (lang.lower(), country.lower())
    else:
        return locale.lower()


def get_locale_from_request(request):
    """
    Figure out what target language is most appropriate based on the
    request
    """
    request_form = request.GET or request.POST

    if request_form.has_key('lang'):
        return locale_to_lower_upper(request_form['lang'])

    accept_lang_matches = request.accept_language.best_matches()

    # Your routing can explicitly specify a target language
    if request.matchdict.has_key('locale'):
        target_lang = request.matchdict['locale']
    elif request.session.has_key('target_lang'):
        target_lang = request.session['target_lang']
    # Pull the first acceptable language
    elif accept_lang_matches:
        target_lang = accept_lang_matches[0]
    # Fall back to English
    else:
        target_lang = 'en'

    return locale_to_lower_upper(target_lang)


# A super strict version of the lxml.html cleaner class
HTML_CLEANER = Cleaner(
    scripts=True,
    javascript=True,
    comments=True,
    style=True,
    links=True,
    page_structure=True,
    processing_instructions=True,
    embedded=True,
    frames=True,
    forms=True,
    annoying_tags=True,
    allow_tags=[
        'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
    remove_unknown_tags=False, # can't be used with allow_tags
    safe_attrs_only=True,
    add_nofollow=True, # for now
    host_whitelist=(),
    whitelist_tags=set([]))


def clean_html(html):
    # clean_html barfs on an empty string
    if not html:
        return u''

    return HTML_CLEANER.clean_html(html)


def convert_to_tag_list_of_dicts(tag_string):
    """
    Filter input from incoming string containing user tags,

    Strips trailing, leading, and internal whitespace, and also converts
    the "tags" text into an array of tags
    """
    taglist = []
    if tag_string:

        # Strip out internal, trailing, and leading whitespace
        stripped_tag_string = u' '.join(tag_string.strip().split())

        # Split the tag string into a list of tags
        for tag in stripped_tag_string.split(
                                       mg_globals.app_config['tags_delimiter']):

            # Ignore empty or duplicate tags
            if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:

                taglist.append({'name': tag.strip(),
                                'slug': slugify(tag.strip())})
    return taglist


def media_tags_as_string(media_entry_tags):
    """
    Generate a string from a media item's tags, stored as a list of dicts

    This is the opposite of convert_to_tag_list_of_dicts
    """
    media_tag_string = ''
    if media_entry_tags:
        media_tag_string = mg_globals.app_config['tags_delimiter'].join(
                                      [tag['name'] for tag in media_entry_tags])
    return media_tag_string

TOO_LONG_TAG_WARNING = \
    u'Tags must be shorter than %s characters.  Tags that are too long: %s'

def tag_length_validator(form, field):
    """
    Make sure tags do not exceed the maximum tag length.
    """
    tags = convert_to_tag_list_of_dicts(field.data)
    too_long_tags = [
        tag['name'] for tag in tags
        if len(tag['name']) > mg_globals.app_config['tags_max_length']]

    if too_long_tags:
        raise wtforms.ValidationError(
            TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
                                    ', '.join(too_long_tags)))


MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')

def cleaned_markdown_conversion(text):
    """
    Take a block of text, run it through MarkDown, and clean its HTML.
    """
    # Markdown will do nothing with and clean_html can do nothing with
    # an empty string :)
    if not text:
        return u''

    return clean_html(MARKDOWN_INSTANCE.convert(text))


SETUP_GETTEXTS = {}

def setup_gettext(locale):
    """
    Setup the gettext instance based on this locale
    """
    # Later on when we have plugins we may want to enable the
    # multi-translations system they have so we can handle plugin
    # translations too

    # TODO: fallback nicely on translations from pt_PT to pt if not
    # available, etc.
    if SETUP_GETTEXTS.has_key(locale):
        this_gettext = SETUP_GETTEXTS[locale]
    else:
        this_gettext = gettext.translation(
            'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
        if exists(locale):
            SETUP_GETTEXTS[locale] = this_gettext

    mg_globals.setup_globals(
        translations=this_gettext)


# Force en to be setup before anything else so that
# mg_globals.translations is never None
setup_gettext('en')


def pass_to_ugettext(*args, **kwargs):
    """
    Pass a translation on to the appropriate ugettext method.

    The reason we can't have a global ugettext method is because
    mg_globals gets swapped out by the application per-request.
    """
    return mg_globals.translations.ugettext(
        *args, **kwargs)


def lazy_pass_to_ugettext(*args, **kwargs):
    """
    Lazily pass to ugettext.

    This is useful if you have to define a translation on a module
    level but you need it to not translate until the time that it's
    used as a string.
    """
    return LazyProxy(pass_to_ugettext, *args, **kwargs)


def pass_to_ngettext(*args, **kwargs):
    """
    Pass a translation on to the appropriate ngettext method.

    The reason we can't have a global ngettext method is because
    mg_globals gets swapped out by the application per-request.
    """
    return mg_globals.translations.ngettext(
        *args, **kwargs)


def lazy_pass_to_ngettext(*args, **kwargs):
    """
    Lazily pass to ngettext.

    This is useful if you have to define a translation on a module
    level but you need it to not translate until the time that it's
    used as a string.
    """
    return LazyProxy(pass_to_ngettext, *args, **kwargs)


def fake_ugettext_passthrough(string):
    """
    Fake a ugettext call for extraction's sake ;)

    In wtforms there's a separate way to define a method to translate
    things... so we just need to mark up the text so that it can be
    extracted, not so that it's actually run through gettext.
    """
    return string


PAGINATION_DEFAULT_PER_PAGE = 30

class Pagination(object):
    """
    Pagination class for mongodb queries.

    Initialization through __init__(self, cursor, page=1, per_page=2),
    get actual data slice through __call__().
    """

    def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
                 jump_to_id=False):
        """
        Initializes Pagination

        Args:
         - page: requested page
         - per_page: number of objects per page
         - cursor: db cursor 
         - jump_to_id: ObjectId, sets the page to the page containing the object
           with _id == jump_to_id.
        """
        self.page = page
        self.per_page = per_page
        self.cursor = cursor
        self.total_count = self.cursor.count()
        self.active_id = None

        if jump_to_id:
            cursor = copy.copy(self.cursor)

            for (doc, increment) in izip(cursor, count(0)):
                if doc['_id'] == jump_to_id:
                    self.page = 1 + int(floor(increment / self.per_page))

                    self.active_id = jump_to_id
                    break


    def __call__(self):
        """
        Returns slice of objects for the requested page
        """
        return self.cursor.skip(
            (self.page - 1) * self.per_page).limit(self.per_page)

    @property
    def pages(self):
        return int(ceil(self.total_count / float(self.per_page)))

    @property
    def has_prev(self):
        return self.page > 1

    @property
    def has_next(self):
        return self.page < self.pages

    def iter_pages(self, left_edge=2, left_current=2,
                   right_current=5, right_edge=2):
        last = 0
        for num in xrange(1, self.pages + 1):
            if num <= left_edge or \
               (num > self.page - left_current - 1 and \
                num < self.page + right_current) or \
               num > self.pages - right_edge:
                if last + 1 != num:
                    yield None
                yield num
                last = num

    def get_page_url_explicit(self, base_url, get_params, page_no):
        """ 
        Get a page url by adding a page= parameter to the base url
        """ 
        new_get_params = copy.copy(get_params or {})
        new_get_params['page'] = page_no
        return "%s?%s" % (
            base_url, urllib.urlencode(new_get_params))

    def get_page_url(self, request, page_no):
        """ 
        Get a new page url based of the request, and the new page number.

        This is a nice wrapper around get_page_url_explicit()
        """ 
        return self.get_page_url_explicit(
            request.path_info, request.GET, page_no)


def gridify_list(this_list, num_cols=5):
    """
    Generates a list of lists where each sub-list's length depends on
    the number of columns in the list
    """
    grid = []

    # Figure out how many rows we should have
    num_rows = int(ceil(float(len(this_list)) / num_cols))

    for row_num in range(num_rows):
        slice_min = row_num * num_cols
        slice_max = (row_num + 1) * num_cols

        row = this_list[slice_min:slice_max]

        grid.append(row)

    return grid


def gridify_cursor(this_cursor, num_cols=5):
    """
    Generates a list of lists where each sub-list's length depends on
    the number of columns in the list
    """
    return gridify_list(list(this_cursor), num_cols)
Commit	Line	Data
8e1e744d	1	# GNU MediaGoblin -- federated, autonomous media hosting
e5572c60 ML	2	# Copyright (C) 2011 Free Software Foundation, Inc
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
af2fcba5 JW	17	from __future__ import division
af2fcba5 JW	18
4d4f6050	19	from email.MIMEText import MIMEText
b77eec65 CAW	20	import gettext
b77eec65 CAW	21	import pkg_resources
4d4f6050	22	import smtplib
cb8ea0fe	23	import sys
0546833c	24	import re
c5678c1a	25	import urllib
af2fcba5	26	from math import ceil, floor
c5678c1a	27	import copy
909371cd	28	import wtforms
c5678c1a	29
f99f61c6	30	from babel.localedata import exists
1c266dc3	31	from babel.support import LazyProxy
31a8ff42	32	import jinja2
0546833c	33	import translitcodec
9150244a	34	from webob import Response, exc
a68ee555	35	from lxml.html.clean import Cleaner
4bf8e888	36	import markdown
1c266dc3	37	from wtforms.form import Form
31a8ff42	38
6e7ce8d1	39	from mediagoblin import mg_globals
22646703	40	from mediagoblin import messages
c5678c1a	41	from mediagoblin.db.util import ObjectId
29f3fb70	42
af2fcba5 JW	43	from itertools import izip, count
af2fcba5 JW	44
2c9e635a JW	45	DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']
2c9e635a JW	46
4d4f6050 CAW	47	TESTS_ENABLED = False
	48	def _activate_testing():
	49	"""
	50	Call this to activate testing in util.py
	51	"""
	52	global TESTS_ENABLED
	53	TESTS_ENABLED = True
	54
	55
66471f0e CAW	56	def clear_test_buckets():
	57	"""
	58	We store some things for testing purposes that should be cleared
	59	when we want a "clean slate" of information for our next round of
	60	tests. Call this function to wipe all that stuff clean.
	61
	62	Also wipes out some other things we might redefine during testing,
	63	like the jinja envs.
	64	"""
	65	global SETUP_JINJA_ENVS
	66	SETUP_JINJA_ENVS = {}
	67
	68	global EMAIL_TEST_INBOX
	69	global EMAIL_TEST_MBOX_INBOX
	70	EMAIL_TEST_INBOX = []
	71	EMAIL_TEST_MBOX_INBOX = []
	72
	73	clear_test_template_context()
	74
	75
f99f61c6 CAW	76	SETUP_JINJA_ENVS = {}
	77
	78
0e0e3d9a CAW	79	def get_jinja_env(template_loader, locale):
	80	"""
	81	Set up the Jinja environment,
	82
	83	(In the future we may have another system for providing theming;
	84	for now this is good enough.)
	85	"""
b77eec65 CAW	86	setup_gettext(locale)
b77eec65 CAW	87
f99f61c6 CAW	88	# If we have a jinja environment set up with this locale, just
	89	# return that one.
	90	if SETUP_JINJA_ENVS.has_key(locale):
	91	return SETUP_JINJA_ENVS[locale]
	92
b77eec65	93	template_env = jinja2.Environment(
0e0e3d9a	94	loader=template_loader, autoescape=True,
44e2da2f	95	extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])
58dec5ef	96
b77eec65	97	template_env.install_gettext_callables(
84f27964	98	mg_globals.translations.ugettext,
1c266dc3	99	mg_globals.translations.ungettext)
b77eec65	100
22646703	101	# All templates will know how to ...
22646703	102	# ... fetch all waiting messages and remove them from the queue
b5017dba	103	# ... construct a grid of thumbnails or other media
22646703	104	template_env.globals['fetch_messages'] = messages.fetch_messages
b5017dba CAW	105	template_env.globals['gridify_list'] = gridify_list
b5017dba CAW	106	template_env.globals['gridify_cursor'] = gridify_cursor
22646703	107
f99f61c6 CAW	108	if exists(locale):
	109	SETUP_JINJA_ENVS[locale] = template_env
	110
b77eec65 CAW	111	return template_env
b77eec65 CAW	112
58dec5ef	113
e9279f21 CAW	114	# We'll store context information here when doing unit tests
	115	TEMPLATE_TEST_CONTEXT = {}
	116
	117
67e8c45d	118	def render_template(request, template_path, context):
e9279f21 CAW	119	"""
	120	Render a template with context.
	121
	122	Always inserts the request into the context, so you don't have to.
	123	Also stores the context if we're doing unit tests. Helpful!
	124	"""
	125	template = request.template_env.get_template(
67e8c45d	126	template_path)
e9279f21 CAW	127	context['request'] = request
	128	rendered = template.render(context)
	129
	130	if TESTS_ENABLED:
67e8c45d	131	TEMPLATE_TEST_CONTEXT[template_path] = context
e9279f21 CAW	132
	133	return rendered
	134
	135
	136	def clear_test_template_context():
	137	global TEMPLATE_TEST_CONTEXT
	138	TEMPLATE_TEST_CONTEXT = {}
	139
	140
1c63ad5d E	141	def render_to_response(request, template, context):
	142	"""Much like Django's shortcut.render()"""
	143	return Response(render_template(request, template, context))
	144
	145
9150244a E	146	def redirect(request, args, *kwargs):
9150244a E	147	"""Returns a HTTPFound(), takes a request and then urlgen params"""
af2fcba5 JW	148
	149	querystring = None
	150	if kwargs.get('querystring'):
	151	querystring = kwargs.get('querystring')
	152	del kwargs['querystring']
	153
	154	return exc.HTTPFound(
	155	location=''.join([
	156	request.urlgen(args, *kwargs),
	157	querystring if querystring else '']))
9150244a E	158
9150244a E	159
58dec5ef CAW	160	def setup_user_in_request(request):
	161	"""
	162	Examine a request and tack on a request.user parameter if that's
	163	appropriate.
	164	"""
	165	if not request.session.has_key('user_id'):
59dd5c7e	166	request.user = None
58dec5ef CAW	167	return
58dec5ef CAW	168
5d6840a0	169	user = None
6648c52b	170	user = request.app.db.User.one(
254bc431	171	{'_id': ObjectId(request.session['user_id'])})
5d6840a0	172
c74e1462 CAW	173	if not user:
	174	# Something's wrong... this user doesn't exist? Invalidate
	175	# this session.
58dec5ef	176	request.session.invalidate()
5d6840a0 CAW	177
5d6840a0 CAW	178	request.user = user
cb8ea0fe CAW	179
	180
	181	def import_component(import_string):
	182	"""
	183	Import a module component defined by STRING. Probably a method,
	184	class, or global variable.
	185
	186	Args:
	187	- import_string: a string that defines what to import. Written
	188	in the format of "module1.module2:component"
	189	"""
	190	module_name, func_name = import_string.split(':', 1)
	191	__import__(module_name)
	192	module = sys.modules[module_name]
	193	func = getattr(module, func_name)
	194	return func
4d4f6050	195
0546833c AW	196	_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{\|},.]+')
	197
	198	def slugify(text, delim=u'-'):
	199	"""
	200	Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
	201	"""
	202	result = []
	203	for word in _punct_re.split(text.lower()):
	204	word = word.encode('translit/long')
	205	if word:
	206	result.append(word)
	207	return unicode(delim.join(result))
4d4f6050 CAW	208
	209	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	210	### Special email test stuff begins HERE
	211	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	212
	213	# We have two "test inboxes" here:
	214	#
	215	# EMAIL_TEST_INBOX:
	216	# ----------------
	217	# If you're writing test views, you'll probably want to check this.
	218	# It contains a list of MIMEText messages.
	219	#
	220	# EMAIL_TEST_MBOX_INBOX:
	221	# ----------------------
	222	# This collects the messages from the FakeMhost inbox. It's reslly
	223	# just here for testing the send_email method itself.
	224	#
	225	# Anyway this contains:
	226	# - from
	227	# - to: a list of email recipient addresses
	228	# - message: not just the body, but the whole message, including
	229	# headers, etc.
	230	#
	231	# *IMPORTANT!*
	232	# ----------------
	233	# Before running tests that call functions which send email, you should
	234	# always call _clear_test_inboxes() to "wipe" the inboxes clean.
	235
	236	EMAIL_TEST_INBOX = []
	237	EMAIL_TEST_MBOX_INBOX = []
	238
	239
	240	class FakeMhost(object):
	241	"""
	242	Just a fake mail host so we can capture and test messages
	243	from send_email
	244	"""
	245	def connect(self):
	246	pass
	247
	248	def sendmail(self, from_addr, to_addrs, message):
	249	EMAIL_TEST_MBOX_INBOX.append(
	250	{'from': from_addr,
	251	'to': to_addrs,
	252	'message': message})
	253
	254	def _clear_test_inboxes():
	255	global EMAIL_TEST_INBOX
	256	global EMAIL_TEST_MBOX_INBOX
	257	EMAIL_TEST_INBOX = []
	258	EMAIL_TEST_MBOX_INBOX = []
	259
	260	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	261	### </Special email test stuff>
	262	### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	263
	264	def send_email(from_addr, to_addrs, subject, message_body):
61ec968b CAW	265	"""
	266	Simple email sending wrapper, use this so we can capture messages
	267	for unit testing purposes.
	268
	269	Args:
	270	- from_addr: address you're sending the email from
	271	- to_addrs: list of recipient email addresses
	272	- subject: subject of the email
	273	- message_body: email body text
	274	"""
4d4f6050	275	# TODO: make a mock mhost if testing is enabled
6ae8b541	276	if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
4d4f6050	277	mhost = FakeMhost()
6ae8b541	278	elif not mg_globals.app_config['email_debug_mode']:
4d4f6050 CAW	279	mhost = smtplib.SMTP()
	280
	281	mhost.connect()
	282
	283	message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
	284	message['Subject'] = subject
	285	message['From'] = from_addr
	286	message['To'] = ', '.join(to_addrs)
	287
	288	if TESTS_ENABLED:
	289	EMAIL_TEST_INBOX.append(message)
	290
6ae8b541	291	if mg_globals.app_config['email_debug_mode']:
29f3fb70 CAW	292	print u"===== Email ====="
	293	print u"From address: %s" % message['From']
	294	print u"To addresses: %s" % message['To']
	295	print u"Subject: %s" % message['Subject']
	296	print u"-- Body: --"
	297	print message.get_payload(decode=True)
	298
21919313	299	return mhost.sendmail(from_addr, to_addrs, message.as_string())
20c834ff	300
8b28bee4 CAW	301
	302	###################
	303	# Translation tools
	304	###################
	305
	306
b77eec65	307	TRANSLATIONS_PATH = pkg_resources.resource_filename(
145922b8	308	'mediagoblin', 'i18n')
b77eec65 CAW	309
b77eec65 CAW	310
8b28bee4 CAW	311	def locale_to_lower_upper(locale):
	312	"""
	313	Take a locale, regardless of style, and format it like "en-us"
	314	"""
	315	if '-' in locale:
	316	lang, country = locale.split('-', 1)
	317	return '%s_%s' % (lang.lower(), country.upper())
	318	elif '_' in locale:
	319	lang, country = locale.split('_', 1)
	320	return '%s_%s' % (lang.lower(), country.upper())
	321	else:
	322	return locale.lower()
	323
	324
	325	def locale_to_lower_lower(locale):
	326	"""
	327	Take a locale, regardless of style, and format it like "en_US"
	328	"""
	329	if '_' in locale:
	330	lang, country = locale.split('_', 1)
	331	return '%s-%s' % (lang.lower(), country.lower())
	332	else:
	333	return locale.lower()
	334
	335
	336	def get_locale_from_request(request):
	337	"""
	338	Figure out what target language is most appropriate based on the
	339	request
	340	"""
	341	request_form = request.GET or request.POST
	342
	343	if request_form.has_key('lang'):
	344	return locale_to_lower_upper(request_form['lang'])
	345
	346	accept_lang_matches = request.accept_language.best_matches()
	347
	348	# Your routing can explicitly specify a target language
376e6ef2 CAW	349	if request.matchdict.has_key('locale'):
376e6ef2 CAW	350	target_lang = request.matchdict['locale']
8b28bee4 CAW	351	elif request.session.has_key('target_lang'):
	352	target_lang = request.session['target_lang']
	353	# Pull the first acceptable language
	354	elif accept_lang_matches:
	355	target_lang = accept_lang_matches[0]
	356	# Fall back to English
	357	else:
	358	target_lang = 'en'
	359
0e0e3d9a	360	return locale_to_lower_upper(target_lang)
b77eec65 CAW	361
b77eec65 CAW	362
a68ee555 CAW	363	# A super strict version of the lxml.html cleaner class
	364	HTML_CLEANER = Cleaner(
	365	scripts=True,
	366	javascript=True,
	367	comments=True,
	368	style=True,
	369	links=True,
	370	page_structure=True,
	371	processing_instructions=True,
	372	embedded=True,
	373	frames=True,
	374	forms=True,
	375	annoying_tags=True,
	376	allow_tags=[
	377	'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
	378	remove_unknown_tags=False, # can't be used with allow_tags
	379	safe_attrs_only=True,
	380	add_nofollow=True, # for now
	381	host_whitelist=(),
	382	whitelist_tags=set([]))
	383
	384
	385	def clean_html(html):
4fd18da0 CAW	386	# clean_html barfs on an empty string
	387	if not html:
	388	return u''
	389
a68ee555 CAW	390	return HTML_CLEANER.clean_html(html)
	391
	392
0712a06d	393	def convert_to_tag_list_of_dicts(tag_string):
cdf538bd	394	"""
909371cd	395	Filter input from incoming string containing user tags,
4bf8e888	396
cdf538bd	397	Strips trailing, leading, and internal whitespace, and also converts
cc7ff3c5	398	the "tags" text into an array of tags
cdf538bd	399	"""
6f2e4585	400	taglist = []
cdf538bd	401	if tag_string:
cc7ff3c5 CFD	402
cc7ff3c5 CFD	403	# Strip out internal, trailing, and leading whitespace
93e3468a	404	stripped_tag_string = u' '.join(tag_string.strip().split())
cc7ff3c5 CFD	405
cc7ff3c5 CFD	406	# Split the tag string into a list of tags
10d7496d CFD	407	for tag in stripped_tag_string.split(
10d7496d CFD	408	mg_globals.app_config['tags_delimiter']):
cc7ff3c5	409
f99b5cae CFD	410	# Ignore empty or duplicate tags
f99b5cae CFD	411	if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:
cc7ff3c5	412
1b89b817 CAW	413	taglist.append({'name': tag.strip(),
1b89b817 CAW	414	'slug': slugify(tag.strip())})
6f2e4585	415	return taglist
cdf538bd CFD	416
cdf538bd CFD	417
0712a06d CFD	418	def media_tags_as_string(media_entry_tags):
	419	"""
	420	Generate a string from a media item's tags, stored as a list of dicts
	421
	422	This is the opposite of convert_to_tag_list_of_dicts
	423	"""
	424	media_tag_string = ''
	425	if media_entry_tags:
	426	media_tag_string = mg_globals.app_config['tags_delimiter'].join(
	427	[tag['name'] for tag in media_entry_tags])
	428	return media_tag_string
	429
909371cd CFD	430	TOO_LONG_TAG_WARNING = \
	431	u'Tags must be shorter than %s characters. Tags that are too long: %s'
	432
	433	def tag_length_validator(form, field):
	434	"""
	435	Make sure tags do not exceed the maximum tag length.
	436	"""
0712a06d	437	tags = convert_to_tag_list_of_dicts(field.data)
909371cd	438	too_long_tags = [
0712a06d CFD	439	tag['name'] for tag in tags
0712a06d CFD	440	if len(tag['name']) > mg_globals.app_config['tags_max_length']]
909371cd CFD	441
	442	if too_long_tags:
	443	raise wtforms.ValidationError(
10d7496d CFD	444	TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
10d7496d CFD	445	', '.join(too_long_tags)))
4bf8e888 CAW	446
4bf8e888 CAW	447
cdf538bd	448	MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')
4bf8e888 CAW	449
	450	def cleaned_markdown_conversion(text):
	451	"""
	452	Take a block of text, run it through MarkDown, and clean its HTML.
	453	"""
82688846 CAW	454	# Markdown will do nothing with and clean_html can do nothing with
	455	# an empty string :)
	456	if not text:
	457	return u''
	458
4bf8e888 CAW	459	return clean_html(MARKDOWN_INSTANCE.convert(text))
	460
	461
f99f61c6 CAW	462	SETUP_GETTEXTS = {}
f99f61c6 CAW	463
b77eec65 CAW	464	def setup_gettext(locale):
	465	"""
	466	Setup the gettext instance based on this locale
	467	"""
	468	# Later on when we have plugins we may want to enable the
	469	# multi-translations system they have so we can handle plugin
	470	# translations too
	471
	472	# TODO: fallback nicely on translations from pt_PT to pt if not
	473	# available, etc.
f99f61c6 CAW	474	if SETUP_GETTEXTS.has_key(locale):
	475	this_gettext = SETUP_GETTEXTS[locale]
	476	else:
	477	this_gettext = gettext.translation(
	478	'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
	479	if exists(locale):
	480	SETUP_GETTEXTS[locale] = this_gettext
b77eec65	481
6e7ce8d1	482	mg_globals.setup_globals(
b77eec65	483	translations=this_gettext)
ae85ed0f BK	484
ae85ed0f BK	485
03e5bd6d CAW	486	# Force en to be setup before anything else so that
	487	# mg_globals.translations is never None
	488	setup_gettext('en')
	489
	490
	491	def pass_to_ugettext(args, *kwargs):
	492	"""
	493	Pass a translation on to the appropriate ugettext method.
	494
	495	The reason we can't have a global ugettext method is because
	496	mg_globals gets swapped out by the application per-request.
	497	"""
	498	return mg_globals.translations.ugettext(
	499	args, *kwargs)
	500
	501
1c266dc3 CAW	502	def lazy_pass_to_ugettext(args, *kwargs):
	503	"""
	504	Lazily pass to ugettext.
	505
	506	This is useful if you have to define a translation on a module
	507	level but you need it to not translate until the time that it's
	508	used as a string.
	509	"""
	510	return LazyProxy(pass_to_ugettext, args, *kwargs)
	511
	512
	513	def pass_to_ngettext(args, *kwargs):
	514	"""
	515	Pass a translation on to the appropriate ngettext method.
	516
	517	The reason we can't have a global ngettext method is because
	518	mg_globals gets swapped out by the application per-request.
	519	"""
	520	return mg_globals.translations.ngettext(
	521	args, *kwargs)
	522
	523
	524	def lazy_pass_to_ngettext(args, *kwargs):
	525	"""
	526	Lazily pass to ngettext.
	527
	528	This is useful if you have to define a translation on a module
	529	level but you need it to not translate until the time that it's
	530	used as a string.
	531	"""
	532	return LazyProxy(pass_to_ngettext, args, *kwargs)
	533
	534
	535	def fake_ugettext_passthrough(string):
	536	"""
	537	Fake a ugettext call for extraction's sake ;)
	538
	539	In wtforms there's a separate way to define a method to translate
	540	things... so we just need to mark up the text so that it can be
	541	extracted, not so that it's actually run through gettext.
	542	"""
	543	return string
	544
	545
b9e9610b CAW	546	PAGINATION_DEFAULT_PER_PAGE = 30
b9e9610b CAW	547
ae85ed0f BK	548	class Pagination(object):
ae85ed0f BK	549	"""
dffa0b09 CAW	550	Pagination class for mongodb queries.
	551
	552	Initialization through __init__(self, cursor, page=1, per_page=2),
	553	get actual data slice through __call__().
ae85ed0f	554	"""
ca3ca51c	555
af2fcba5 JW	556	def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
af2fcba5 JW	557	jump_to_id=False):
44e3e917	558	"""
a98d5254 CAW	559	Initializes Pagination
	560
	561	Args:
	562	- page: requested page
	563	- per_page: number of objects per page
	564	- cursor: db cursor
af2fcba5 JW	565	- jump_to_id: ObjectId, sets the page to the page containing the object
af2fcba5 JW	566	with _id == jump_to_id.
44e3e917	567	"""
af2fcba5	568	self.page = page
ca3ca51c BK	569	self.per_page = per_page
ca3ca51c BK	570	self.cursor = cursor
ca3ca51c	571	self.total_count = self.cursor.count()
af2fcba5 JW	572	self.active_id = None
	573
	574	if jump_to_id:
	575	cursor = copy.copy(self.cursor)
	576
	577	for (doc, increment) in izip(cursor, count(0)):
	578	if doc['_id'] == jump_to_id:
	579	self.page = 1 + int(floor(increment / self.per_page))
	580
	581	self.active_id = jump_to_id
	582	break
	583
ca3ca51c BK	584
ca3ca51c BK	585	def __call__(self):
44e3e917	586	"""
a98d5254	587	Returns slice of objects for the requested page
44e3e917	588	"""
140e2102 CAW	589	return self.cursor.skip(
140e2102 CAW	590	(self.page - 1) * self.per_page).limit(self.per_page)
ae85ed0f BK	591
	592	@property
	593	def pages(self):
	594	return int(ceil(self.total_count / float(self.per_page)))
	595
	596	@property
	597	def has_prev(self):
	598	return self.page > 1
	599
	600	@property
	601	def has_next(self):
	602	return self.page < self.pages
	603
	604	def iter_pages(self, left_edge=2, left_current=2,
	605	right_current=5, right_edge=2):
	606	last = 0
	607	for num in xrange(1, self.pages + 1):
	608	if num <= left_edge or \
	609	(num > self.page - left_current - 1 and \
	610	num < self.page + right_current) or \
	611	num > self.pages - right_edge:
	612	if last + 1 != num:
	613	yield None
	614	yield num
	615	last = num
44e3e917	616
50c880ac	617	def get_page_url_explicit(self, base_url, get_params, page_no):
44e3e917	618	"""
50c880ac	619	Get a page url by adding a page= parameter to the base url
44e3e917 BK	620	"""
	621	new_get_params = copy.copy(get_params or {})
	622	new_get_params['page'] = page_no
	623	return "%s?%s" % (
50c880ac CAW	624	base_url, urllib.urlencode(new_get_params))
	625
	626	def get_page_url(self, request, page_no):
	627	"""
	628	Get a new page url based of the request, and the new page number.
	629
	630	This is a nice wrapper around get_page_url_explicit()
	631	"""
	632	return self.get_page_url_explicit(
	633	request.path_info, request.GET, page_no)
b5017dba CAW	634
	635
	636	def gridify_list(this_list, num_cols=5):
	637	"""
	638	Generates a list of lists where each sub-list's length depends on
	639	the number of columns in the list
	640	"""
	641	grid = []
	642
	643	# Figure out how many rows we should have
	644	num_rows = int(ceil(float(len(this_list)) / num_cols))
	645
	646	for row_num in range(num_rows):
	647	slice_min = row_num * num_cols
	648	slice_max = (row_num + 1) * num_cols
	649
	650	row = this_list[slice_min:slice_max]
	651
	652	grid.append(row)
	653
	654	return grid
	655
	656
	657	def gridify_cursor(this_cursor, num_cols=5):
	658	"""
	659	Generates a list of lists where each sub-list's length depends on
	660	the number of columns in the list
	661	"""
	662	return gridify_list(list(this_cursor), num_cols)