mediagoblin/processing/__init__.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from collections import OrderedDict
  18 import logging
  19 import os
  20
  21 from mediagoblin import mg_globals as mgg
  22 from mediagoblin.db.util import atomic_update
  23 from mediagoblin.db.models import MediaEntry
  24 from mediagoblin.tools.pluginapi import hook_handle
  25 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
  26
  27 _log = logging.getLogger(__name__)
  28
  29
  30 class ProgressCallback(object):
  31     def __init__(self, entry):
  32         self.entry = entry
  33
  34     def __call__(self, progress):
  35         if progress:
  36             self.entry.transcoding_progress = progress
  37             self.entry.save()
  38
  39
  40 def create_pub_filepath(entry, filename):
  41     return mgg.public_store.get_unique_filepath(
  42             ['media_entries',
  43              unicode(entry.id),
  44              filename])
  45
  46
  47 class FilenameBuilder(object):
  48     """Easily slice and dice filenames.
  49
  50     Initialize this class with an original file path, then use the fill()
  51     method to create new filenames based on the original.
  52
  53     """
  54     MAX_FILENAME_LENGTH = 255  # VFAT's maximum filename length
  55
  56     def __init__(self, path):
  57         """Initialize a builder from an original file path."""
  58         self.dirpath, self.basename = os.path.split(path)
  59         self.basename, self.ext = os.path.splitext(self.basename)
  60         self.ext = self.ext.lower()
  61
  62     def fill(self, fmtstr):
  63         """Build a new filename based on the original.
  64
  65         The fmtstr argument can include the following:
  66         {basename} -- the original basename, with the extension removed
  67         {ext} -- the original extension, always lowercase
  68
  69         If necessary, {basename} will be truncated so the filename does not
  70         exceed this class' MAX_FILENAME_LENGTH in length.
  71
  72         """
  73         basename_len = (self.MAX_FILENAME_LENGTH -
  74                         len(fmtstr.format(basename='', ext=self.ext)))
  75         return fmtstr.format(basename=self.basename[:basename_len],
  76                              ext=self.ext)
  77
  78
  79
  80 class MediaProcessor(object):
  81     """A particular processor for this media type.
  82
  83     While the ProcessingManager handles all types of MediaProcessing
  84     possible for a particular media type, a MediaProcessor can be
  85     thought of as a *particular* processing action for a media type.
  86     For example, you may have separate MediaProcessors for:
  87
  88     - initial_processing: the intial processing of a media
  89     - gen_thumb: generate a thumbnail
  90     - resize: resize an image
  91     - transcode: transcode a video
  92
  93     ... etc.
  94
  95     Some information on producing a new MediaProcessor for your media type:
  96
  97     - You *must* supply a name attribute.  This must be a class level
  98       attribute, and a string.  This will be used to determine the
  99       subcommand of your process
 100     - It's recommended that you supply a class level description
 101       attribute.
 102     - Supply a media_is_eligible classmethod.  This will be used to
 103       determine whether or not a media entry is eligible to use this
 104       processor type.  See the method documentation for details.
 105     - To give "./bin/gmg reprocess run" abilities to this media type,
 106       supply both gnerate_parser and parser_to_request classmethods.
 107     - The process method will be what actually processes your media.
 108     """
 109     # You MUST override this in the child MediaProcessor!
 110     name = None
 111
 112     # Optional, but will be used in various places to describe the
 113     # action this MediaProcessor provides
 114     description = None
 115
 116     def __init__(self, manager, media_entry):
 117         self.manager = manager
 118         self.media_entry = media_entry
 119         self.entry_orig_state = media_entry.state
 120
 121         # Should be initialized at time of processing, at least
 122         self.workbench = None
 123
 124     def __enter__(self):
 125         self.workbench = mgg.workbench_manager.create()
 126
 127     def __exit__(self, *args):
 128         self.workbench.destroy()
 129         self.workbench = None
 130
 131     # @with_workbench
 132     def process(self, **kwargs):
 133         """
 134         Actually process this media entry.
 135         """
 136         raise NotImplementedError
 137
 138     @classmethod
 139     def media_is_eligible(cls, media_entry):
 140         raise NotImplementedError
 141
 142     ###############################
 143     # Command line interface things
 144     ###############################
 145
 146     @classmethod
 147     def generate_parser(cls):
 148         raise NotImplementedError
 149
 150     @classmethod
 151     def args_to_request(cls, args):
 152         raise NotImplementedError
 153
 154     ##########################################
 155     # THE FUTURE: web interface things here :)
 156     ##########################################
 157
 158
 159 class ProcessingKeyError(Exception): pass
 160 class ProcessorDoesNotExist(ProcessingKeyError): pass
 161 class ProcessorNotEligible(ProcessingKeyError): pass
 162
 163
 164 class ProcessingManager(object):
 165     """Manages all the processing actions available for a media type
 166
 167     Specific processing actions, MediaProcessor subclasses, are added
 168     to the ProcessingManager.
 169     """
 170     def __init__(self):
 171         # Dict of all MediaProcessors of this media type
 172         self.processors = OrderedDict()
 173
 174     def add_processor(self, processor):
 175         """
 176         Add a processor class to this media type
 177         """
 178         name = processor.name
 179         if name is None:
 180             raise AttributeError("Processor class's .name attribute not set")
 181
 182         self.processors[name] = processor
 183
 184     def list_eligible_processors(self, entry):
 185         """
 186         List all processors that this media entry is eligible to be processed
 187         for.
 188         """
 189         return [
 190             processor
 191             for processor in self.processors.values()
 192             if processor.media_is_eligible(entry)]
 193
 194     def list_all_processors(self):
 195         return self.processors.values()
 196
 197     def gen_process_request_via_cli(self, subparser):
 198         # Got to figure out what actually goes here before I can write this properly
 199         pass
 200
 201     def get_processor(self, key, entry=None):
 202         """
 203         Get the processor with this key.
 204
 205         If entry supplied, make sure this entry is actually compatible;
 206         otherwise raise error.
 207         """
 208         try:
 209             processor = self.processors[key]
 210         except KeyError:
 211             raise ProcessorDoesNotExist(
 212                 "'%s' processor does not exist for this media type" % key)
 213
 214         if entry and not processor.media_is_eligible(entry):
 215             raise ProcessorNotEligible(
 216                 "This entry is not eligible for processor with name '%s'" % key)
 217
 218         return processor
 219
 220     def process_from_args(self, entry, reprocess_command, request):
 221         """
 222         Process a media entry.
 223         """
 224         pass
 225
 226
 227 def request_from_args(args, which_args):
 228     """
 229     Generate a request from the values of some argparse parsed args
 230     """
 231     request = {}
 232     for arg in which_args:
 233         request[arg] = getattr(args, arg)
 234
 235     return request
 236
 237
 238 class MediaEntryNotFound(Exception): pass
 239
 240
 241 def get_processing_manager_for_type(media_type):
 242     """
 243     Get the appropriate media manager for this type
 244     """
 245     manager_class = hook_handle(('reprocess_manager', media_type))
 246     manager = manager_class()
 247
 248     return manager
 249
 250
 251 def get_entry_and_processing_manager(media_id):
 252     """
 253     Get a MediaEntry, its media type, and its manager all in one go.
 254
 255     Returns a tuple of: `(entry, media_type, media_manager)`
 256     """
 257     entry = MediaEntry.query.filter_by(id=media_id).first()
 258     if entry is None:
 259         raise MediaEntryNotFound("Can't find media with id '%s'" % media_id)
 260
 261     manager = get_processing_manager_for_type(entry.media_type)
 262
 263     return entry, manager
 264
 265
 266 ################################################
 267 # TODO: This ProcessingState is OUTDATED,
 268 #   and needs to be refactored into other tools!
 269 ################################################
 270
 271 class ProcessingState(object):
 272     """
 273     The first and only argument to the "processor" of a media type
 274
 275     This could be thought of as a "request" to the processor
 276     function. It has the main info for the request (media entry)
 277     and a bunch of tools for the request on it.
 278     It can get more fancy without impacting old media types.
 279     """
 280     def __init__(self, entry):
 281         self.entry = entry
 282         self.workbench = None
 283         self.orig_filename = None
 284
 285     def set_workbench(self, wb):
 286         self.workbench = wb
 287
 288     def get_orig_filename(self):
 289         """
 290         Get the a filename for the original, on local storage
 291
 292         If the media entry has a queued_media_file, use that, otherwise
 293         use the original.
 294
 295         In the future, this will return the highest quality file available
 296         if neither the original or queued file are available
 297         """
 298         if self.orig_filename is not None:
 299             return self.orig_filename
 300
 301         if self.entry.queued_media_file:
 302             orig_filepath = self.entry.queued_media_file
 303             storage = mgg.queue_store
 304         else:
 305             orig_filepath = self.entry.media_files['original']
 306             storage = mgg.public_store
 307
 308         orig_filename = self.workbench.localized_file(
 309             storage, orig_filepath,
 310             'source')
 311         self.orig_filename = orig_filename
 312         return orig_filename
 313
 314     def copy_original(self, target_name, keyname=u"original"):
 315         self.store_public(keyname, self.get_orig_filename(), target_name)
 316
 317     def store_public(self, keyname, local_file, target_name=None):
 318         if target_name is None:
 319             target_name = os.path.basename(local_file)
 320         target_filepath = create_pub_filepath(self.entry, target_name)
 321         if keyname in self.entry.media_files:
 322             _log.warn("store_public: keyname %r already used for file %r, "
 323                       "replacing with %r", keyname,
 324                       self.entry.media_files[keyname], target_filepath)
 325         mgg.public_store.copy_local_to_storage(local_file, target_filepath)
 326         self.entry.media_files[keyname] = target_filepath
 327
 328     def delete_queue_file(self):
 329         # Remove queued media file from storage and database.
 330         # queued_filepath is in the task_id directory which should
 331         # be removed too, but fail if the directory is not empty to be on
 332         # the super-safe side.
 333         queued_filepath = self.entry.queued_media_file
 334         mgg.queue_store.delete_file(queued_filepath)      # rm file
 335         mgg.queue_store.delete_dir(queued_filepath[:-1])  # rm dir
 336         self.entry.queued_media_file = []
 337
 338
 339 def mark_entry_failed(entry_id, exc):
 340     """
 341     Mark a media entry as having failed in its conversion.
 342
 343     Uses the exception that was raised to mark more information.  If
 344     the exception is a derivative of BaseProcessingFail then we can
 345     store extra information that can be useful for users telling them
 346     why their media failed to process.
 347
 348     Args:
 349      - entry_id: The id of the media entry
 350
 351     """
 352     # Was this a BaseProcessingFail?  In other words, was this a
 353     # type of error that we know how to handle?
 354     if isinstance(exc, BaseProcessingFail):
 355         # Looks like yes, so record information about that failure and any
 356         # metadata the user might have supplied.
 357         atomic_update(mgg.database.MediaEntry,
 358             {'id': entry_id},
 359             {u'state': u'failed',
 360              u'fail_error': unicode(exc.exception_path),
 361              u'fail_metadata': exc.metadata})
 362     else:
 363         _log.warn("No idea what happened here, but it failed: %r", exc)
 364         # Looks like no, so just mark it as failed and don't record a
 365         # failure_error (we'll assume it wasn't handled) and don't record
 366         # metadata (in fact overwrite it if somehow it had previous info
 367         # here)
 368         atomic_update(mgg.database.MediaEntry,
 369             {'id': entry_id},
 370             {u'state': u'failed',
 371              u'fail_error': None,
 372              u'fail_metadata': {}})
 373
 374
 375 class BaseProcessingFail(Exception):
 376     """
 377     Base exception that all other processing failure messages should
 378     subclass from.
 379
 380     You shouldn't call this itself; instead you should subclass it
 381     and provid the exception_path and general_message applicable to
 382     this error.
 383     """
 384     general_message = u''
 385
 386     @property
 387     def exception_path(self):
 388         return u"%s:%s" % (
 389             self.__class__.__module__, self.__class__.__name__)
 390
 391     def __init__(self, **metadata):
 392         self.metadata = metadata or {}
 393
 394
 395 class BadMediaFail(BaseProcessingFail):
 396     """
 397     Error that should be raised when an inappropriate file was given
 398     for the media type specified.
 399     """
 400     general_message = _(u'Invalid file given for media type.')