Record the original state of the media entry in the processor

[mediagoblin.git] / mediagoblin / processing / __init__.py
diff --git a/mediagoblin/processing/__init__.py b/mediagoblin/processing/__init__.py

index 738378b89717c18f58eda927d0327fa4707e9463..47f0b84e92abcc4e8d1d40383b5e8d62d0719259 100644 (file)
--- a/mediagoblin/processing/__init__.py
+++ b/mediagoblin/processing/__init__.py
@@ -14,12 +14,14 @@
  # You should have received a copy of the GNU Affero General Public License
  # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  
+from collections import OrderedDict
  import logging
  import os
  
-from mediagoblin.db.util import atomic_update
  from mediagoblin import mg_globals as mgg
-
+from mediagoblin.db.util import atomic_update
+from mediagoblin.db.models import MediaEntry
+from mediagoblin.tools.pluginapi import hook_handle
  from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
  
  _log = logging.getLogger(__name__)
@@ -74,34 +76,263 @@ class FilenameBuilder(object):
                               ext=self.ext)
  
  
+
+class MediaProcessor(object):
+    """A particular processor for this media type.
+
+    While the ProcessingManager handles all types of MediaProcessing
+    possible for a particular media type, a MediaProcessor can be
+    thought of as a *particular* processing action for a media type.
+    For example, you may have separate MediaProcessors for:
+
+    - initial_processing: the intial processing of a media
+    - gen_thumb: generate a thumbnail
+    - resize: resize an image
+    - transcode: transcode a video
+
+    ... etc.  
+
+    Some information on producing a new MediaProcessor for your media type:
+
+    - You *must* supply a name attribute.  This must be a class level
+      attribute, and a string.  This will be used to determine the
+      subcommand of your process
+    - It's recommended that you supply a class level description
+      attribute.
+    - Supply a media_is_eligible classmethod.  This will be used to
+      determine whether or not a media entry is eligible to use this
+      processor type.  See the method documentation for details.
+    - To give "./bin/gmg reprocess run" abilities to this media type,
+      supply both gnerate_parser and parser_to_request classmethods.
+    - The process method will be what actually processes your media.
+    """
+    # You MUST override this in the child MediaProcessor!
+    name = None
+
+    # Optional, but will be used in various places to describe the
+    # action this MediaProcessor provides
+    description = None
+
+    def __init__(self, manager, media_entry):
+        self.manager = manager
+        self.media_entry = media_entry
+        self.entry_orig_state = media_entry.state
+
+        # Should be initialized at time of processing, at least
+        self.workbench = None
+
+    def __enter__(self):
+        self.workbench = mgg.workbench_manager.create()
+
+    def __exit__(self, *args):
+        self.workbench.destroy()
+        self.workbench = None
+
+    # @with_workbench
+    def process(self, **kwargs):
+        """
+        Actually process this media entry.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def media_is_eligible(cls, media_entry):
+        raise NotImplementedError
+
+    ###############################
+    # Command line interface things
+    ###############################
+
+    @classmethod
+    def generate_parser(cls):
+        raise NotImplementedError
+
+    @classmethod
+    def args_to_request(cls, args):
+        raise NotImplementedError
+
+    ##########################################
+    # THE FUTURE: web interface things here :)
+    ##########################################
+
+
+class ProcessingKeyError(Exception): pass
+class ProcessorDoesNotExist(ProcessingKeyError): pass
+class ProcessorNotEligible(ProcessingKeyError): pass
+
+
+class ProcessingManager(object):
+    """Manages all the processing actions available for a media type
+
+    Specific processing actions, MediaProcessor subclasses, are added
+    to the ProcessingManager.
+    """
+    def __init__(self):
+        # Dict of all MediaProcessors of this media type
+        self.processors = OrderedDict()
+
+    def add_processor(self, processor):
+        """
+        Add a processor class to this media type
+        """
+        name = processor.name
+        if name is None:
+            raise AttributeError("Processor class's .name attribute not set")
+        
+        self.processors[name] = processor
+
+    def list_eligible_processors(self, entry):
+        """
+        List all processors that this media entry is eligible to be processed
+        for.
+        """
+        return [
+            processor
+            for processor in self.processors.values()
+            if processor.media_is_eligible(entry)]
+
+    def list_all_processors(self):
+        return self.processors.values()
+
+    def gen_process_request_via_cli(self, subparser):
+        # Got to figure out what actually goes here before I can write this properly
+        pass
+
+    def get_processor(self, key, entry=None):
+        """
+        Get the processor with this key.
+
+        If entry supplied, make sure this entry is actually compatible;
+        otherwise raise error.
+        """
+        try:
+            processor = self.processors[key]
+        except KeyError:
+            raise ProcessorDoesNotExist(
+                "'%s' processor does not exist for this media type" % key)
+
+        if entry and not processor.media_is_eligible(entry):
+            raise ProcessorNotEligible(
+                "This entry is not eligible for processor with name '%s'" % key)
+
+        return processor
+
+    def process_from_args(self, entry, reprocess_command, request):
+        """
+        Process a media entry.
+        """
+        pass
+
+
+def request_from_args(args, which_args):
+    """
+    Generate a request from the values of some argparse parsed args
+    """
+    request = {}
+    for arg in which_args:
+        request[arg] = getattr(args, arg)
+
+    return request
+
+
+class MediaEntryNotFound(Exception): pass
+
+
+def get_processing_manager_for_type(media_type):
+    """
+    Get the appropriate media manager for this type
+    """
+    manager_class = hook_handle(('reprocess_manager', media_type))
+    manager = manager_class()
+
+    return manager
+
+
+def get_entry_and_processing_manager(media_id):
+    """
+    Get a MediaEntry, its media type, and its manager all in one go.
+
+    Returns a tuple of: `(entry, media_type, media_manager)`
+    """
+    entry = MediaEntry.query.filter_by(id=media_id).first()
+    if entry is None:
+        raise MediaEntryNotFound("Can't find media with id '%s'" % media_id)
+
+    manager = get_processing_manager_for_type(entry.media_type)
+
+    return entry, manager
+
+
+################################################
+# TODO: This ProcessingState is OUTDATED,
+#   and needs to be refactored into other tools!
+################################################
+
  class ProcessingState(object):
+    """
+    The first and only argument to the "processor" of a media type
+
+    This could be thought of as a "request" to the processor
+    function. It has the main info for the request (media entry)
+    and a bunch of tools for the request on it.
+    It can get more fancy without impacting old media types.
+    """
      def __init__(self, entry):
          self.entry = entry
          self.workbench = None
-        self.queued_filename = None
-
-        # Monkey patch us onto the entry
-        entry.proc_state = self
+        self.orig_filename = None
  
      def set_workbench(self, wb):
          self.workbench = wb
  
-    def get_queued_filename(self):
+    def get_orig_filename(self):
          """
          Get the a filename for the original, on local storage
+
+        If the media entry has a queued_media_file, use that, otherwise
+        use the original.
+
+        In the future, this will return the highest quality file available
+        if neither the original or queued file are available
          """
-        if self.queued_filename is not None:
-            return self.queued_filename
-        queued_filepath = self.entry.queued_media_file
-        queued_filename = self.workbench.localized_file(
-            mgg.queue_store, queued_filepath,
+        if self.orig_filename is not None:
+            return self.orig_filename
+
+        if self.entry.queued_media_file:
+            orig_filepath = self.entry.queued_media_file
+            storage = mgg.queue_store
+        else:
+            orig_filepath = self.entry.media_files['original']
+            storage = mgg.public_store
+
+        orig_filename = self.workbench.localized_file(
+            storage, orig_filepath,
              'source')
-        self.queued_filename = queued_filename
-        return queued_filename
+        self.orig_filename = orig_filename
+        return orig_filename
+
+    def copy_original(self, target_name, keyname=u"original"):
+        self.store_public(keyname, self.get_orig_filename(), target_name)
+
+    def store_public(self, keyname, local_file, target_name=None):
+        if target_name is None:
+            target_name = os.path.basename(local_file)
+        target_filepath = create_pub_filepath(self.entry, target_name)
+        if keyname in self.entry.media_files:
+            _log.warn("store_public: keyname %r already used for file %r, "
+                      "replacing with %r", keyname,
+                      self.entry.media_files[keyname], target_filepath)
+        mgg.public_store.copy_local_to_storage(local_file, target_filepath)
+        self.entry.media_files[keyname] = target_filepath
  
      def delete_queue_file(self):
+        # Remove queued media file from storage and database.
+        # queued_filepath is in the task_id directory which should
+        # be removed too, but fail if the directory is not empty to be on
+        # the super-safe side.
          queued_filepath = self.entry.queued_media_file
-        mgg.queue_store.delete_file(queued_filepath)
+        mgg.queue_store.delete_file(queued_filepath)      # rm file
+        mgg.queue_store.delete_dir(queued_filepath[:-1])  # rm dir
          self.entry.queued_media_file = []