Added exception handling into the metadata format checking function.
[mediagoblin.git] / mediagoblin / processing / task.py
index 901d293b9eba6157c783bddc2e69552798ba7ad6..7f68348566b5897b68d23c0c4a8b634c08b07edd 100644 (file)
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import logging
+import urllib
+import urllib2
 
-from celery.task import Task
+import celery
+from celery.registry import tasks
 
 from mediagoblin import mg_globals as mgg
-from mediagoblin.db.util import ObjectId
-from mediagoblin.media_types import get_media_manager
-from mediagoblin.processing import mark_entry_failed, BaseProcessingFail
+from . import mark_entry_failed, BaseProcessingFail
+from mediagoblin.tools.processing import json_processing_callback
+from mediagoblin.processing import get_entry_and_processing_manager
 
 _log = logging.getLogger(__name__)
+logging.basicConfig()
+_log.setLevel(logging.DEBUG)
+
+
+@celery.task(default_retry_delay=2 * 60)
+def handle_push_urls(feed_url):
+    """Subtask, notifying the PuSH servers of new content
+
+    Retry 3 times every 2 minutes if run in separate process before failing."""
+    if not mgg.app_config["push_urls"]:
+        return # Nothing to do
+    _log.debug('Notifying Push servers for feed {0}'.format(feed_url))
+    hubparameters = {
+        'hub.mode': 'publish',
+        'hub.url': feed_url}
+    hubdata = urllib.urlencode(hubparameters)
+    hubheaders = {
+        "Content-type": "application/x-www-form-urlencoded",
+        "Connection": "close"}
+    for huburl in mgg.app_config["push_urls"]:
+        hubrequest = urllib2.Request(huburl, hubdata, hubheaders)
+        try:
+            hubresponse = urllib2.urlopen(hubrequest)
+        except (urllib2.HTTPError, urllib2.URLError) as exc:
+            # We retry by default 3 times before failing
+            _log.info("PuSH url %r gave error %r", huburl, exc)
+            try:
+                return handle_push_urls.retry(exc=exc, throw=False)
+            except Exception as e:
+                # All retries failed, Failure is no tragedy here, probably.
+                _log.warn('Failed to notify PuSH server for feed {0}. '
+                          'Giving up.'.format(feed_url))
+                return False
 
 
 ################################
 # Media processing initial steps
 ################################
-
-class ProcessMedia(Task):
+class ProcessMedia(celery.Task):
     """
-    DEPRECATED -- This now resides in the individual media plugins
-
     Pass this entry off for processing.
     """
-    def run(self, media_id):
+    def run(self, media_id, feed_url, reprocess_action, reprocess_info=None):
         """
         Pass the media entry off to the appropriate processing function
         (for now just process_image...)
+
+        :param feed_url: The feed URL that the PuSH server needs to be
+            updated for.
+        :param reprocess: A dict containing all of the necessary reprocessing
+            info for the media_type.
         """
-        entry = mgg.database.MediaEntry.one(
-            {'_id': ObjectId(media_id)})
+        reprocess_info = reprocess_info or {}
+        entry, manager = get_entry_and_processing_manager(media_id)
 
         # Try to process, and handle expected errors.
         try:
-            #__import__(entry.media_type)
-            manager = get_media_manager(entry.media_type)
-            _log.debug('Processing {0}'.format(entry))
-            manager['processor'](entry)
-        except BaseProcessingFail, exc:
-            mark_entry_failed(entry._id, exc)
+            processor_class = manager.get_processor(reprocess_action, entry)
+
+            with processor_class(manager, entry) as processor:
+                # Initial state change has to be here because
+                # the entry.state gets recorded on processor_class init
+                entry.state = u'processing'
+                entry.save()
+
+                _log.debug('Processing {0}'.format(entry))
+
+                try:
+                    processor.process(**reprocess_info)
+                except Exception as exc:
+                    if processor.entry_orig_state == 'processed':
+                        _log.error(
+                            'Entry {0} failed to process due to the following'
+                            ' error: {1}'.format(entry.id, exc))
+                        _log.info(
+                            'Setting entry.state back to "processed"')
+                        pass
+                    else:
+                        raise
+
+            # We set the state to processed and save the entry here so there's
+            # no need to save at the end of the processing stage, probably ;)
+            entry.state = u'processed'
+            entry.save()
+
+            # Notify the PuSH servers as async task
+            if mgg.app_config["push_urls"] and feed_url:
+                handle_push_urls.subtask().delay(feed_url)
+
+            json_processing_callback(entry)
+        except BaseProcessingFail as exc:
+            mark_entry_failed(entry.id, exc)
+            json_processing_callback(entry)
             return
-        except ImportError, exc:
+
+        except ImportError as exc:
             _log.error(
                 'Entry {0} failed to process due to an import error: {1}'\
                     .format(
                     entry.title,
                     exc))
 
-            mark_entry_failed(entry._id, exc)
+            mark_entry_failed(entry.id, exc)
+            json_processing_callback(entry)
+
+        except Exception as exc:
+            _log.error('An unhandled exception was raised while'
+                    + ' processing {0}'.format(
+                        entry))
 
-        entry.state = u'processed'
-        entry.save()
+            mark_entry_failed(entry.id, exc)
+            json_processing_callback(entry)
+            raise
 
     def on_failure(self, exc, task_id, args, kwargs, einfo):
         """
@@ -76,3 +152,8 @@ class ProcessMedia(Task):
         """
         entry_id = args[0]
         mark_entry_failed(entry_id, exc)
+
+        entry = mgg.database.MediaEntry.query.filter_by(id=entry_id).first()
+        json_processing_callback(entry)
+
+tasks.register(ProcessMedia)