From 9945468603b8476f735d735dbc7505ee47ff9f42 Mon Sep 17 00:00:00 2001 From: Sebastian Spaeth Date: Thu, 29 Nov 2012 11:28:25 +0100 Subject: [PATCH] trim_whitespace meddleware plugin Our HTML output is very verbose (=whitespacy) as our templates are written with an 80 char limit and lots of newlines between blocks, variables etc.... This is a plugin that naively strips of all but the first whitespace from the HTML response. We might want to have an all-fancy html tidy interface here at some point, but it nicely decreases the HTML size about a third on some simple pages. Signed-off-by: Sebastian Spaeth --- .../plugins/trim_whitespace/README.rst | 25 +++++++ .../plugins/trim_whitespace/__init__.py | 73 +++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 mediagoblin/plugins/trim_whitespace/README.rst create mode 100644 mediagoblin/plugins/trim_whitespace/__init__.py diff --git a/mediagoblin/plugins/trim_whitespace/README.rst b/mediagoblin/plugins/trim_whitespace/README.rst new file mode 100644 index 00000000..b55ce35e --- /dev/null +++ b/mediagoblin/plugins/trim_whitespace/README.rst @@ -0,0 +1,25 @@ +======================= + Trim whitespace plugin +======================= + +Mediagoblin templates are written with 80 char limit for better +readability. However that means that the html output is very verbose +containing LOTS of whitespace. This plugin inserts a Middleware that +filters out whitespace from the returned HTML in the Response() objects. + +Simply enable this plugin by putting it somewhere where python can reach it and put it's path into the [plugins] section of your mediagoblin.ini or mediagoblin_local.ini like for example this: + + [plugins] + [[mediagoblin.plugins.trim_whitespace]] + +There is no further configuration required. If this plugin is enabled, +all text/html documents should not have lots of whitespace in between +elements, although it does a very naive filtering right now (just keep +the first whitespace and delete all subsequent ones). + +Nonetheless, it is a useful plugin that might serve as inspiration for +other plugin writers. + +It was originally conceived by Sebastian Spaeth. It is licensed under +the GNU AGPL v3 (or any later version) license. + diff --git a/mediagoblin/plugins/trim_whitespace/__init__.py b/mediagoblin/plugins/trim_whitespace/__init__.py new file mode 100644 index 00000000..3da1e8b4 --- /dev/null +++ b/mediagoblin/plugins/trim_whitespace/__init__.py @@ -0,0 +1,73 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +from __future__ import unicode_literals +import logging +import re + +from mediagoblin import meddleware + +_log = logging.getLogger(__name__) + +class TrimWhiteSpaceMeddleware(meddleware.BaseMeddleware): + _setup_plugin_called = 0 + RE_MULTI_WHITESPACE = re.compile(b'(\s)\s+', re.M) + + def process_response(self, request, response): + """Perform very naive html tidying by removing multiple whitespaces""" + # werkzeug.BaseResponse has no content_type attr, this comes via + # werkzeug.wrappers.CommonRequestDescriptorsMixin (part of + # wrappers.Response) + if getattr(response ,'content_type', None) != 'text/html': + return + + # This is a tad more complex than needed to be able to handle + # response.data and response.body, depending on whether we have + # a werkzeug Resonse or a webob one. Let's kill webob soon! + if hasattr(response, 'body') and not hasattr(response, 'data'): + # Old-style webob Response object. + # TODO: Remove this once we transition away from webob + resp_attr = 'body' + else: + resp_attr = 'data' + # Don't flatten iterator to list when we fudge the response body + # (see werkzeug.Response documentation) + response.implicit_sequence_conversion = False + + # Set the tidied text. Very naive tidying for now, just strip all + # subsequent whitespaces (this preserves most newlines) + setattr(response, resp_attr, re.sub( + TrimWhiteSpaceMeddleware.RE_MULTI_WHITESPACE, br'\1', + getattr(response, resp_attr))) + + @classmethod + def setup_plugin(cls): + """Set up this meddleware as a plugin during 'setup' hook""" + global _log + if cls._setup_plugin_called: + _log.info('Trim whitespace plugin was already set up.') + return + + _log.debug('Trim whitespace plugin set up.') + cls._setup_plugin_called += 1 + + # Append ourselves to the list of enabled Meddlewares + meddleware.ENABLED_MEDDLEWARE.append( + '{0}:{1}'.format(cls.__module__, cls.__name__)) + + +hooks = { + 'setup': TrimWhiteSpaceMeddleware.setup_plugin + } -- 2.25.1