mediagoblin/processing/__init__.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from collections import OrderedDict
  18 import logging
  19 import os
  20
  21 from mediagoblin import mg_globals as mgg
  22 from mediagoblin.db.util import atomic_update
  23 from mediagoblin.db.models import MediaEntry
  24 from mediagoblin.tools.pluginapi import hook_handle
  25 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
  26
  27 _log = logging.getLogger(__name__)
  28
  29
  30 class ProgressCallback(object):
  31     def __init__(self, entry):
  32         self.entry = entry
  33
  34     def __call__(self, progress):
  35         if progress:
  36             self.entry.transcoding_progress = progress
  37             self.entry.save()
  38
  39
  40 def create_pub_filepath(entry, filename):
  41     return mgg.public_store.get_unique_filepath(
  42             ['media_entries',
  43              unicode(entry.id),
  44              filename])
  45
  46
  47 class FilenameBuilder(object):
  48     """Easily slice and dice filenames.
  49
  50     Initialize this class with an original file path, then use the fill()
  51     method to create new filenames based on the original.
  52
  53     """
  54     MAX_FILENAME_LENGTH = 255  # VFAT's maximum filename length
  55
  56     def __init__(self, path):
  57         """Initialize a builder from an original file path."""
  58         self.dirpath, self.basename = os.path.split(path)
  59         self.basename, self.ext = os.path.splitext(self.basename)
  60         self.ext = self.ext.lower()
  61
  62     def fill(self, fmtstr):
  63         """Build a new filename based on the original.
  64
  65         The fmtstr argument can include the following:
  66         {basename} -- the original basename, with the extension removed
  67         {ext} -- the original extension, always lowercase
  68
  69         If necessary, {basename} will be truncated so the filename does not
  70         exceed this class' MAX_FILENAME_LENGTH in length.
  71
  72         """
  73         basename_len = (self.MAX_FILENAME_LENGTH -
  74                         len(fmtstr.format(basename='', ext=self.ext)))
  75         return fmtstr.format(basename=self.basename[:basename_len],
  76                              ext=self.ext)
  77
  78
  79
  80 class MediaProcessor(object):
  81     """A particular processor for this media type.
  82
  83     While the ProcessingManager handles all types of MediaProcessing
  84     possible for a particular media type, a MediaProcessor can be
  85     thought of as a *particular* processing action for a media type.
  86     For example, you may have separate MediaProcessors for:
  87
  88     - initial_processing: the intial processing of a media
  89     - gen_thumb: generate a thumbnail
  90     - resize: resize an image
  91     - transcode: transcode a video
  92
  93     ... etc.
  94
  95     Some information on producing a new MediaProcessor for your media type:
  96
  97     - You *must* supply a name attribute.  This must be a class level
  98       attribute, and a string.  This will be used to determine the
  99       subcommand of your process
 100     - It's recommended that you supply a class level description
 101       attribute.
 102     - Supply a media_is_eligible classmethod.  This will be used to
 103       determine whether or not a media entry is eligible to use this
 104       processor type.  See the method documentation for details.
 105     - To give "./bin/gmg reprocess run" abilities to this media type,
 106       supply both gnerate_parser and parser_to_request classmethods.
 107     - The process method will be what actually processes your media.
 108     """
 109     # You MUST override this in the child MediaProcessor!
 110     name = None
 111
 112     # Optional, but will be used in various places to describe the
 113     # action this MediaProcessor provides
 114     description = None
 115
 116     def __init__(self, manager, media_entry):
 117         self.manager = manager
 118         self.media_entry = media_entry
 119
 120         # Should be initialized at time of processing, at least
 121         self.workbench = None
 122
 123     # @with_workbench
 124     def process(self, **kwargs):
 125         """
 126         Actually process this media entry.
 127         """
 128         raise NotImplementedError
 129
 130     @classmethod
 131     def media_is_eligible(cls, media_entry):
 132         raise NotImplementedError
 133
 134     ###############################
 135     # Command line interface things
 136     ###############################
 137
 138     @classmethod
 139     def generate_parser(cls):
 140         raise NotImplementedError
 141
 142     @classmethod
 143     def args_to_request(cls, args):
 144         raise NotImplementedError
 145
 146     ##########################################
 147     # THE FUTURE: web interface things here :)
 148     ##########################################
 149
 150
 151 class ProcessingKeyError(Exception): pass
 152 class ProcessorDoesNotExist(ProcessingKeyError): pass
 153 class ProcessorNotEligible(ProcessingKeyError): pass
 154
 155
 156 class ProcessingManager(object):
 157     """Manages all the processing actions available for a media type
 158
 159     Specific processing actions, MediaProcessor subclasses, are added
 160     to the ProcessingManager.
 161     """
 162     def __init__(self):
 163         # Dict of all MediaProcessors of this media type
 164         self.processors = OrderedDict()
 165
 166     def add_processor(self, processor):
 167         """
 168         Add a processor class to this media type
 169         """
 170         name = processor.name
 171         if name is None:
 172             raise AttributeError("Processor class's .name attribute not set")
 173
 174         self.processors[name] = processor
 175
 176     def list_eligible_processors(self, entry):
 177         """
 178         List all processors that this media entry is eligible to be processed
 179         for.
 180         """
 181         return [
 182             processor
 183             for processor in self.processors.values()
 184             if processor.media_is_eligible(entry)]
 185
 186     def list_all_processors(self):
 187         return self.processors.values()
 188
 189     def gen_process_request_via_cli(self, subparser):
 190         # Got to figure out what actually goes here before I can write this properly
 191         pass
 192
 193     def get_processor(self, key, entry=None):
 194         """
 195         Get the processor with this key.
 196
 197         If entry supplied, make sure this entry is actually compatible;
 198         otherwise raise error.
 199         """
 200         try:
 201             processor = self.processors[key]
 202         except KeyError:
 203             raise ProcessorDoesNotExist(
 204                 "'%s' processor does not exist for this media type" % key)
 205
 206         if entry and not processor.media_is_eligible(entry):
 207             raise ProcessorNotEligible(
 208                 "This entry is not eligible for processor with name '%s'" % key)
 209
 210         return processor
 211
 212     def process_from_args(self, entry, reprocess_command, request):
 213         """
 214         Process a media entry.
 215         """
 216         pass
 217
 218
 219 def request_from_args(args, which_args):
 220     """
 221     Generate a request from the values of some argparse parsed args
 222     """
 223     request = {}
 224     for arg in which_args:
 225         request[arg] = getattr(args, arg)
 226
 227     return request
 228
 229
 230 class MediaEntryNotFound(Exception): pass
 231
 232
 233 def get_manager_for_type(media_type):
 234     """
 235     Get the appropriate media manager for this type
 236     """
 237     manager_class = hook_handle(('reprocess_manager', media_type))
 238     manager = manager_class()
 239
 240     return manager
 241
 242
 243 def get_entry_and_manager(media_id):
 244     """
 245     Get a MediaEntry, its media type, and its manager all in one go.
 246
 247     Returns a tuple of: `(entry, media_type, media_manager)`
 248     """
 249     entry = MediaEntry.query.filter_by(id=media_id).first()
 250     if entry is None:
 251         raise MediaEntryNotFound("Can't find media with id '%s'" % media_id)
 252
 253     manager = get_manager_for_type(entry.media_type)
 254
 255     return entry, manager
 256
 257
 258 ################################################
 259 # TODO: This ProcessingState is OUTDATED,
 260 #   and needs to be refactored into other tools!
 261 ################################################
 262
 263 class ProcessingState(object):
 264     """
 265     The first and only argument to the "processor" of a media type
 266
 267     This could be thought of as a "request" to the processor
 268     function. It has the main info for the request (media entry)
 269     and a bunch of tools for the request on it.
 270     It can get more fancy without impacting old media types.
 271     """
 272     def __init__(self, entry):
 273         self.entry = entry
 274         self.workbench = None
 275         self.orig_filename = None
 276
 277     def set_workbench(self, wb):
 278         self.workbench = wb
 279
 280     def get_orig_filename(self):
 281         """
 282         Get the a filename for the original, on local storage
 283
 284         If the media entry has a queued_media_file, use that, otherwise
 285         use the original.
 286
 287         In the future, this will return the highest quality file available
 288         if neither the original or queued file are available
 289         """
 290         if self.orig_filename is not None:
 291             return self.orig_filename
 292
 293         if self.entry.queued_media_file:
 294             orig_filepath = self.entry.queued_media_file
 295             storage = mgg.queue_store
 296         else:
 297             orig_filepath = self.entry.media_files['original']
 298             storage = mgg.public_store
 299
 300         orig_filename = self.workbench.localized_file(
 301             storage, orig_filepath,
 302             'source')
 303         self.orig_filename = orig_filename
 304         return orig_filename
 305
 306     def copy_original(self, target_name, keyname=u"original"):
 307         self.store_public(keyname, self.get_orig_filename(), target_name)
 308
 309     def store_public(self, keyname, local_file, target_name=None):
 310         if target_name is None:
 311             target_name = os.path.basename(local_file)
 312         target_filepath = create_pub_filepath(self.entry, target_name)
 313         if keyname in self.entry.media_files:
 314             _log.warn("store_public: keyname %r already used for file %r, "
 315                       "replacing with %r", keyname,
 316                       self.entry.media_files[keyname], target_filepath)
 317         mgg.public_store.copy_local_to_storage(local_file, target_filepath)
 318         self.entry.media_files[keyname] = target_filepath
 319
 320     def delete_queue_file(self):
 321         # Remove queued media file from storage and database.
 322         # queued_filepath is in the task_id directory which should
 323         # be removed too, but fail if the directory is not empty to be on
 324         # the super-safe side.
 325         queued_filepath = self.entry.queued_media_file
 326         mgg.queue_store.delete_file(queued_filepath)      # rm file
 327         mgg.queue_store.delete_dir(queued_filepath[:-1])  # rm dir
 328         self.entry.queued_media_file = []
 329
 330
 331 def mark_entry_failed(entry_id, exc):
 332     """
 333     Mark a media entry as having failed in its conversion.
 334
 335     Uses the exception that was raised to mark more information.  If
 336     the exception is a derivative of BaseProcessingFail then we can
 337     store extra information that can be useful for users telling them
 338     why their media failed to process.
 339
 340     Args:
 341      - entry_id: The id of the media entry
 342
 343     """
 344     # Was this a BaseProcessingFail?  In other words, was this a
 345     # type of error that we know how to handle?
 346     if isinstance(exc, BaseProcessingFail):
 347         # Looks like yes, so record information about that failure and any
 348         # metadata the user might have supplied.
 349         atomic_update(mgg.database.MediaEntry,
 350             {'id': entry_id},
 351             {u'state': u'failed',
 352              u'fail_error': unicode(exc.exception_path),
 353              u'fail_metadata': exc.metadata})
 354     else:
 355         _log.warn("No idea what happened here, but it failed: %r", exc)
 356         # Looks like no, so just mark it as failed and don't record a
 357         # failure_error (we'll assume it wasn't handled) and don't record
 358         # metadata (in fact overwrite it if somehow it had previous info
 359         # here)
 360         atomic_update(mgg.database.MediaEntry,
 361             {'id': entry_id},
 362             {u'state': u'failed',
 363              u'fail_error': None,
 364              u'fail_metadata': {}})
 365
 366
 367 class BaseProcessingFail(Exception):
 368     """
 369     Base exception that all other processing failure messages should
 370     subclass from.
 371
 372     You shouldn't call this itself; instead you should subclass it
 373     and provid the exception_path and general_message applicable to
 374     this error.
 375     """
 376     general_message = u''
 377
 378     @property
 379     def exception_path(self):
 380         return u"%s:%s" % (
 381             self.__class__.__module__, self.__class__.__name__)
 382
 383     def __init__(self, **metadata):
 384         self.metadata = metadata or {}
 385
 386
 387 class BadMediaFail(BaseProcessingFail):
 388     """
 389     Error that should be raised when an inappropriate file was given
 390     for the media type specified.
 391     """
 392     general_message = _(u'Invalid file given for media type.')