From 2cfffd5ed8c054bb60c27ede4e69667f97d12b09 Mon Sep 17 00:00:00 2001 From: Sebastian Spaeth Date: Tue, 15 Jan 2013 14:41:30 +0100 Subject: [PATCH] Make PuSHing the Pubhubsubbub server an async task (#436, #585) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Notifying the PuSH servers had 3 problems.  1) it was done immediately after sending of the processing task to celery. So if celery was run in a separate process we would notify the PuSH servers before the new media was processed/ visible. (#436) 2) Notification code was called in submit/views.py, so submitting via the API never resulted in notifications. (#585) 3) If Notifying the PuSH server failed, we would never retry. The solution was to make the PuSH notification an asynchronous subtask. This way: 1) it will only be called once async processing has finished, 2) it is in the main processing code path, so even API calls will result in notifications, and 3) We retry 3 times in case of failure before giving up. If the server is in a separate process, we will wait 3x 2 minutes before retrying the notification. The only downside is that the celery server needs to have access to the internet to ping the PuSH server. If that is a problem, we need to make the task belong to a special group of celery servers that has access to the internet. As a side effect, I believe I removed the limitation that prevented us from upgrading celery. Signed-off-by: Sebastian Spaeth --- mediagoblin/plugins/api/views.py | 2 +- mediagoblin/processing/task.py | 48 ++++++++++++++++++++++++++++++-- mediagoblin/submit/lib.py | 41 ++++----------------------- mediagoblin/submit/views.py | 8 ++---- 4 files changed, 54 insertions(+), 45 deletions(-) diff --git a/mediagoblin/plugins/api/views.py b/mediagoblin/plugins/api/views.py index 6aa4ef9f..7383e20d 100644 --- a/mediagoblin/plugins/api/views.py +++ b/mediagoblin/plugins/api/views.py @@ -86,7 +86,7 @@ def post_entry(request): # # (... don't change entry after this point to avoid race # conditions with changes to the document via processing code) - run_process_media(entry) + run_process_media(entry, request) return json_response(get_entry_serializable(entry, request.urlgen)) diff --git a/mediagoblin/processing/task.py b/mediagoblin/processing/task.py index b29de9bd..2cdd5f1a 100644 --- a/mediagoblin/processing/task.py +++ b/mediagoblin/processing/task.py @@ -15,8 +15,10 @@ # along with this program. If not, see . import logging +import urllib +import urllib2 -from celery.task import Task +from celery import registry, task from mediagoblin import mg_globals as mgg from mediagoblin.db.models import MediaEntry @@ -28,18 +30,51 @@ logging.basicConfig() _log.setLevel(logging.DEBUG) +@task.task(default_retry_delay=2 * 60) +def handle_push_urls(feed_url): + """Subtask, notifying the PuSH servers of new content + + Retry 3 times every 2 minutes if run in separate process before failing.""" + if not mgg.app_config["push_urls"]: + return # Nothing to do + _log.debug('Notifying Push servers for feed {0}'.format(feed_url)) + hubparameters = { + 'hub.mode': 'publish', + 'hub.url': feed_url} + hubdata = urllib.urlencode(hubparameters) + hubheaders = { + "Content-type": "application/x-www-form-urlencoded", + "Connection": "close"} + for huburl in mgg.app_config["push_urls"]: + hubrequest = urllib2.Request(huburl, hubdata, hubheaders) + try: + hubresponse = urllib2.urlopen(hubrequest) + except (urllib2.HTTPError, urllib2.URLError) as exc: + # We retry by default 3 times before failing + _log.info("PuSH url %r gave error %r", huburl, exc) + try: + return handle_push_urls.retry(exc=exc, throw=False) + except Exception as e: + # All retries failed, Failure is no tragedy here, probably. + _log.warn('Failed to notify PuSH server for feed {0}. ' + 'Giving up.'.format(feed_url)) + return False + ################################ # Media processing initial steps ################################ -class ProcessMedia(Task): +class ProcessMedia(task.Task): """ Pass this entry off for processing. """ - def run(self, media_id): + def run(self, media_id, feed_url): """ Pass the media entry off to the appropriate processing function (for now just process_image...) + + :param feed_url: The feed URL that the PuSH server needs to be + updated for. """ entry = MediaEntry.query.get(media_id) @@ -58,6 +93,9 @@ class ProcessMedia(Task): entry.state = u'processed' entry.save() + # Notify the PuSH servers as async task + handle_push_urls.subtask().delay(feed_url) + json_processing_callback(entry) except BaseProcessingFail as exc: mark_entry_failed(entry.id, exc) @@ -97,3 +135,7 @@ class ProcessMedia(Task): entry = mgg.database.MediaEntry.query.filter_by(id=entry_id).first() json_processing_callback(entry) + +# Register the task +process_media = registry.tasks[ProcessMedia.name] + diff --git a/mediagoblin/submit/lib.py b/mediagoblin/submit/lib.py index db5dfe53..ba07c6fa 100644 --- a/mediagoblin/submit/lib.py +++ b/mediagoblin/submit/lib.py @@ -14,16 +14,12 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import urllib -import urllib2 import logging import uuid -from celery import registry from werkzeug.utils import secure_filename -from mediagoblin import mg_globals from mediagoblin.processing import mark_entry_failed -from mediagoblin.processing.task import ProcessMedia +from mediagoblin.processing.task import process_media _log = logging.getLogger(__name__) @@ -58,11 +54,13 @@ def prepare_queue_task(app, entry, filename): return queue_file -def run_process_media(entry): - process_media = registry.tasks[ProcessMedia.name] +def run_process_media(entry, request): + feed_url = request.urlgen( + 'mediagoblin.user_pages.atom_feed', + qualified=True, user=request.user.username) try: process_media.apply_async( - [unicode(entry.id)], {}, + [entry.id, feed_url], {}, task_id=entry.queued_task_id) except BaseException as exc: # The purpose of this section is because when running in "lazy" @@ -76,30 +74,3 @@ def run_process_media(entry): mark_entry_failed(entry.id, exc) # re-raise the exception raise - - -def handle_push_urls(request): - if mg_globals.app_config["push_urls"]: - feed_url = request.urlgen( - 'mediagoblin.user_pages.atom_feed', - qualified=True, - user=request.user.username) - hubparameters = { - 'hub.mode': 'publish', - 'hub.url': feed_url} - hubdata = urllib.urlencode(hubparameters) - hubheaders = { - "Content-type": "application/x-www-form-urlencoded", - "Connection": "close"} - for huburl in mg_globals.app_config["push_urls"]: - hubrequest = urllib2.Request(huburl, hubdata, hubheaders) - try: - hubresponse = urllib2.urlopen(hubrequest) - except urllib2.HTTPError as exc: - # This is not a big issue, the item will be fetched - # by the PuSH server next time we hit it - _log.warning( - "push url %r gave error %r", huburl, exc.code) - except urllib2.URLError as exc: - _log.warning( - "push url %r is unreachable %r", huburl, exc.reason) diff --git a/mediagoblin/submit/views.py b/mediagoblin/submit/views.py index 2d609b31..145b9f5e 100644 --- a/mediagoblin/submit/views.py +++ b/mediagoblin/submit/views.py @@ -32,8 +32,7 @@ from mediagoblin.submit import forms as submit_forms from mediagoblin.messages import add_message, SUCCESS from mediagoblin.media_types import sniff_media, \ InvalidFileType, FileTypeNotSupported -from mediagoblin.submit.lib import handle_push_urls, run_process_media, \ - prepare_queue_task +from mediagoblin.submit.lib import run_process_media, prepare_queue_task @require_active_login @@ -90,10 +89,7 @@ def submit_start(request): # # (... don't change entry after this point to avoid race # conditions with changes to the document via processing code) - run_process_media(entry) - - handle_push_urls(request) - + run_process_media(entry, request) add_message(request, SUCCESS, _('Woohoo! Submitted!')) return redirect(request, "mediagoblin.user_pages.user_home", -- 2.25.1