mediagoblin/processing/__init__.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from collections import OrderedDict
  18 import logging
  19 import os
  20
  21 from mediagoblin.db.util import atomic_update
  22 from mediagoblin import mg_globals as mgg
  23
  24 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
  25
  26 _log = logging.getLogger(__name__)
  27
  28
  29 class ProgressCallback(object):
  30     def __init__(self, entry):
  31         self.entry = entry
  32
  33     def __call__(self, progress):
  34         if progress:
  35             self.entry.transcoding_progress = progress
  36             self.entry.save()
  37
  38
  39 def create_pub_filepath(entry, filename):
  40     return mgg.public_store.get_unique_filepath(
  41             ['media_entries',
  42              unicode(entry.id),
  43              filename])
  44
  45
  46 class FilenameBuilder(object):
  47     """Easily slice and dice filenames.
  48
  49     Initialize this class with an original file path, then use the fill()
  50     method to create new filenames based on the original.
  51
  52     """
  53     MAX_FILENAME_LENGTH = 255  # VFAT's maximum filename length
  54
  55     def __init__(self, path):
  56         """Initialize a builder from an original file path."""
  57         self.dirpath, self.basename = os.path.split(path)
  58         self.basename, self.ext = os.path.splitext(self.basename)
  59         self.ext = self.ext.lower()
  60
  61     def fill(self, fmtstr):
  62         """Build a new filename based on the original.
  63
  64         The fmtstr argument can include the following:
  65         {basename} -- the original basename, with the extension removed
  66         {ext} -- the original extension, always lowercase
  67
  68         If necessary, {basename} will be truncated so the filename does not
  69         exceed this class' MAX_FILENAME_LENGTH in length.
  70
  71         """
  72         basename_len = (self.MAX_FILENAME_LENGTH -
  73                         len(fmtstr.format(basename='', ext=self.ext)))
  74         return fmtstr.format(basename=self.basename[:basename_len],
  75                              ext=self.ext)
  76
  77
  78
  79 class MediaProcessor(object):
  80     """A particular processor for this media type.
  81
  82     While the ProcessingManager handles all types of MediaProcessing
  83     possible for a particular media type, a MediaProcessor can be
  84     thought of as a *particular* processing action for a media type.
  85     For example, you may have separate MediaProcessors for:
  86
  87     - initial_processing: the intial processing of a media
  88     - gen_thumb: generate a thumbnail
  89     - resize: resize an image
  90     - transcode: transcode a video
  91
  92     ... etc.
  93
  94     Some information on producing a new MediaProcessor for your media type:
  95
  96     - You *must* supply a name attribute.  This must be a class level
  97       attribute, and a string.  This will be used to determine the
  98       subcommand of your process
  99     - It's recommended that you supply a class level description
 100       attribute.
 101     - Supply a media_is_eligible classmethod.  This will be used to
 102       determine whether or not a media entry is eligible to use this
 103       processor type.  See the method documentation for details.
 104     - To give "./bin/gmg reprocess run" abilities to this media type,
 105       supply both gnerate_parser and parser_to_request classmethods.
 106     - The process method will be what actually processes your media.
 107     """
 108     # You MUST override this in the child MediaProcessor!
 109     name = None
 110
 111     # Optional, but will be used in various places to describe the
 112     # action this MediaProcessor provides
 113     description = None
 114
 115     def __init__(self, manager):
 116         self.manager = manager
 117
 118         # Should be initialized at time of processing, at least
 119         self.workbench = None
 120
 121     # @with_workbench
 122     def process(self, **kwargs):
 123         """
 124         Actually process this media entry.
 125         """
 126         raise NotImplementedError
 127
 128     @classmethod
 129     def media_is_eligibile(cls, media_entry):
 130         raise NotImplementedError
 131
 132     ###############################
 133     # Command line interface things
 134     ###############################
 135
 136     @classmethod
 137     def generate_parser(cls):
 138         raise NotImplementedError
 139
 140     @classmethod
 141     def parser_to_request(cls, parser):
 142         raise NotImplementedError
 143
 144     ##########################################
 145     # THE FUTURE: web interface things here :)
 146     ##########################################
 147
 148
 149 class ProcessingManager(object):
 150     """Manages all the processing actions available for a media type
 151
 152     Specific processing actions, MediaProcessor subclasses, are added
 153     to the ProcessingManager.
 154     """
 155     def __init__(self):
 156         # Dict of all MediaProcessors of this media type
 157         self.processors = OrderedDict()
 158
 159     def add_processor(self, processor):
 160         """
 161         Add a processor class to this media type
 162         """
 163         name = processor.name
 164         if name is None:
 165             raise AttributeError("Processor class's .name attribute not set")
 166
 167         self.processors[name] = processor
 168
 169     def list_eligible_processors(self, entry):
 170         """
 171         List all processors that this media entry is eligible to be processed
 172         for.
 173         """
 174         return [
 175             processor
 176             for processor in self.processors.values()
 177             if processor.media_is_eligible(entry)]
 178
 179     def list_all_processors(self):
 180         return self.processors.values()
 181
 182     def gen_process_request_via_cli(self, subparser):
 183         # Got to figure out what actually goes here before I can write this properly
 184         pass
 185
 186     def process(self, entry, directive, request):
 187         """
 188         Process a media entry.
 189         """
 190         pass
 191
 192
 193 class ProcessingState(object):
 194     """
 195     The first and only argument to the "processor" of a media type
 196
 197     This could be thought of as a "request" to the processor
 198     function. It has the main info for the request (media entry)
 199     and a bunch of tools for the request on it.
 200     It can get more fancy without impacting old media types.
 201     """
 202     def __init__(self, entry):
 203         self.entry = entry
 204         self.workbench = None
 205         self.orig_filename = None
 206
 207     def set_workbench(self, wb):
 208         self.workbench = wb
 209
 210     def get_orig_filename(self):
 211         """
 212         Get the a filename for the original, on local storage
 213
 214         If the media entry has a queued_media_file, use that, otherwise
 215         use the original.
 216
 217         In the future, this will return the highest quality file available
 218         if neither the original or queued file are available
 219         """
 220         if self.orig_filename is not None:
 221             return self.orig_filename
 222
 223         if self.entry.queued_media_file:
 224             orig_filepath = self.entry.queued_media_file
 225             storage = mgg.queue_store
 226         else:
 227             orig_filepath = self.entry.media_files['original']
 228             storage = mgg.public_store
 229
 230         orig_filename = self.workbench.localized_file(
 231             storage, orig_filepath,
 232             'source')
 233         self.orig_filename = orig_filename
 234         return orig_filename
 235
 236     def copy_original(self, target_name, keyname=u"original"):
 237         self.store_public(keyname, self.get_orig_filename(), target_name)
 238
 239     def store_public(self, keyname, local_file, target_name=None):
 240         if target_name is None:
 241             target_name = os.path.basename(local_file)
 242         target_filepath = create_pub_filepath(self.entry, target_name)
 243         if keyname in self.entry.media_files:
 244             _log.warn("store_public: keyname %r already used for file %r, "
 245                       "replacing with %r", keyname,
 246                       self.entry.media_files[keyname], target_filepath)
 247         mgg.public_store.copy_local_to_storage(local_file, target_filepath)
 248         self.entry.media_files[keyname] = target_filepath
 249
 250     def delete_queue_file(self):
 251         # Remove queued media file from storage and database.
 252         # queued_filepath is in the task_id directory which should
 253         # be removed too, but fail if the directory is not empty to be on
 254         # the super-safe side.
 255         queued_filepath = self.entry.queued_media_file
 256         mgg.queue_store.delete_file(queued_filepath)      # rm file
 257         mgg.queue_store.delete_dir(queued_filepath[:-1])  # rm dir
 258         self.entry.queued_media_file = []
 259
 260
 261 def mark_entry_failed(entry_id, exc):
 262     """
 263     Mark a media entry as having failed in its conversion.
 264
 265     Uses the exception that was raised to mark more information.  If
 266     the exception is a derivative of BaseProcessingFail then we can
 267     store extra information that can be useful for users telling them
 268     why their media failed to process.
 269
 270     Args:
 271      - entry_id: The id of the media entry
 272
 273     """
 274     # Was this a BaseProcessingFail?  In other words, was this a
 275     # type of error that we know how to handle?
 276     if isinstance(exc, BaseProcessingFail):
 277         # Looks like yes, so record information about that failure and any
 278         # metadata the user might have supplied.
 279         atomic_update(mgg.database.MediaEntry,
 280             {'id': entry_id},
 281             {u'state': u'failed',
 282              u'fail_error': unicode(exc.exception_path),
 283              u'fail_metadata': exc.metadata})
 284     else:
 285         _log.warn("No idea what happened here, but it failed: %r", exc)
 286         # Looks like no, so just mark it as failed and don't record a
 287         # failure_error (we'll assume it wasn't handled) and don't record
 288         # metadata (in fact overwrite it if somehow it had previous info
 289         # here)
 290         atomic_update(mgg.database.MediaEntry,
 291             {'id': entry_id},
 292             {u'state': u'failed',
 293              u'fail_error': None,
 294              u'fail_metadata': {}})
 295
 296
 297 class BaseProcessingFail(Exception):
 298     """
 299     Base exception that all other processing failure messages should
 300     subclass from.
 301
 302     You shouldn't call this itself; instead you should subclass it
 303     and provid the exception_path and general_message applicable to
 304     this error.
 305     """
 306     general_message = u''
 307
 308     @property
 309     def exception_path(self):
 310         return u"%s:%s" % (
 311             self.__class__.__module__, self.__class__.__name__)
 312
 313     def __init__(self, **metadata):
 314         self.metadata = metadata or {}
 315
 316
 317 class BadMediaFail(BaseProcessingFail):
 318     """
 319     Error that should be raised when an inappropriate file was given
 320     for the media type specified.
 321     """
 322     general_message = _(u'Invalid file given for media type.')