mediagoblin/media_types/video/processing.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 import argparse
  18 import os.path
  19 import logging
  20 import datetime
  21 import celery
  22
  23 import six
  24
  25 from celery import group
  26 from mediagoblin import mg_globals as mgg
  27 from mediagoblin.processing import (
  28     FilenameBuilder, BaseProcessingFail,
  29     ProgressCallback, MediaProcessor,
  30     ProcessingManager, request_from_args,
  31     get_process_filename, store_public,
  32     copy_original, get_entry_and_processing_manager)
  33 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
  34 from mediagoblin.media_types import MissingComponents
  35
  36 from . import transcoders
  37 from .util import skip_transcode, ACCEPTED_RESOLUTIONS
  38
  39 _log = logging.getLogger(__name__)
  40 _log.setLevel(logging.DEBUG)
  41
  42 MEDIA_TYPE = 'mediagoblin.media_types.video'
  43
  44
  45 class VideoTranscodingFail(BaseProcessingFail):
  46     '''
  47     Error raised if video transcoding fails
  48     '''
  49     general_message = _(u'Video transcoding failed')
  50
  51
  52 def sniffer(media_file):
  53     '''New style sniffer, used in two-steps check; requires to have .name'''
  54     _log.info('Sniffing {0}'.format(MEDIA_TYPE))
  55     try:
  56         data = transcoders.discover(media_file.name)
  57     except Exception as e:
  58         # this is usually GLib.GError, but we don't really care which one
  59         _log.warning(u'GStreamer: {0}'.format(six.text_type(e)))
  60         raise MissingComponents(u'GStreamer: {0}'.format(six.text_type(e)))
  61     _log.debug('Discovered: {0}'.format(data))
  62
  63     if not data.get_video_streams():
  64         raise MissingComponents('No video streams found in this video')
  65
  66     if data.get_result() != 0:  # it's 0 if success
  67         try:
  68             missing = data.get_misc().get_string('name')
  69             _log.warning('GStreamer: missing {0}'.format(missing))
  70         except AttributeError as e:
  71             # AttributeError happens here on gstreamer >1.4, when get_misc
  72             # returns None. There is a special function to get info about
  73             # missing plugin. This info should be printed to logs for admin and
  74             # showed to the user in a short and nice version
  75             details = data.get_missing_elements_installer_details()
  76             _log.warning('GStreamer: missing: {0}'.format(', '.join(details)))
  77             missing = u', '.join([u'{0} ({1})'.format(*d.split('|')[3:])
  78                                   for d in details])
  79         raise MissingComponents(u'{0} is missing'.format(missing))
  80
  81     return MEDIA_TYPE
  82
  83
  84 EXCLUDED_EXTS = ["nef", "svg"]
  85
  86 def sniff_handler(media_file, filename):
  87     name, ext = os.path.splitext(filename)
  88     clean_ext = ext.lower()[1:]
  89
  90     if clean_ext in EXCLUDED_EXTS:
  91         # We don't handle this filetype, though gstreamer might think we can
  92         _log.info('Refused to process {0} due to excluded extension'.format(filename))
  93         return None
  94
  95     try:
  96         return sniffer(media_file)
  97     except:
  98         _log.error('Could not discover {0}'.format(filename))
  99         return None
 100
 101 def get_tags(stream_info):
 102     'gets all tags and their values from stream info'
 103     taglist = stream_info.get_tags()
 104     if not taglist:
 105         return {}
 106     tags = []
 107     taglist.foreach(
 108             lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
 109     tags = dict(tags)
 110
 111     # date/datetime should be converted from GDate/GDateTime to strings
 112     if 'date' in tags:
 113         date = tags['date']
 114         tags['date'] = "%s-%s-%s" % (
 115                 date.year, date.month, date.day)
 116
 117     if 'datetime' in tags:
 118         # TODO: handle timezone info; gst.get_time_zone_offset +
 119         # python's tzinfo should help
 120         dt = tags['datetime']
 121         try:
 122             tags['datetime'] = datetime.datetime(
 123                 dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
 124                 dt.get_minute(), dt.get_second(),
 125                 dt.get_microsecond()).isoformat()
 126         except:
 127             tags['datetime'] = None
 128     for k, v in tags.copy().items():
 129         # types below are accepted by json; others must not present
 130         if not isinstance(v, (dict, list, six.string_types, int, float, bool,
 131                               type(None))):
 132             del tags[k]
 133     return dict(tags)
 134
 135 def store_metadata(media_entry, metadata):
 136     """
 137     Store metadata from this video for this media entry.
 138     """
 139     stored_metadata = dict()
 140     audio_info_list = metadata.get_audio_streams()
 141     if audio_info_list:
 142         stored_metadata['audio'] = []
 143     for audio_info in audio_info_list:
 144         stored_metadata['audio'].append(
 145                 {
 146                     'channels': audio_info.get_channels(),
 147                     'bitrate': audio_info.get_bitrate(),
 148                     'depth': audio_info.get_depth(),
 149                     'languange': audio_info.get_language(),
 150                     'sample_rate': audio_info.get_sample_rate(),
 151                     'tags': get_tags(audio_info)
 152                 })
 153
 154     video_info_list = metadata.get_video_streams()
 155     if video_info_list:
 156         stored_metadata['video'] = []
 157     for video_info in video_info_list:
 158         stored_metadata['video'].append(
 159                 {
 160                     'width': video_info.get_width(),
 161                     'height': video_info.get_height(),
 162                     'bitrate': video_info.get_bitrate(),
 163                     'depth': video_info.get_depth(),
 164                     'videorate': [video_info.get_framerate_num(),
 165                                   video_info.get_framerate_denom()],
 166                     'tags': get_tags(video_info)
 167                 })
 168
 169     stored_metadata['common'] = {
 170         'duration': metadata.get_duration(),
 171         'tags': get_tags(metadata),
 172     }
 173     # Only save this field if there's something to save
 174     if len(stored_metadata):
 175         media_entry.media_data_init(orig_metadata=stored_metadata)
 176
 177
 178 @celery.task()
 179 def main_task(entry_id, resolution, medium_size, **process_info):
 180     """
 181     Main celery task to transcode the video to the default resolution
 182     and store original video metadata.
 183     """
 184     _log.debug('MediaEntry processing')
 185     entry, manager = get_entry_and_processing_manager(entry_id)
 186     with CommonVideoProcessor(manager, entry) as processor:
 187         processor.common_setup(resolution)
 188         processor.transcode(medium_size=tuple(medium_size),
 189                             vp8_quality=process_info['vp8_quality'],
 190                             vp8_threads=process_info['vp8_threads'],
 191                             vorbis_quality=process_info['vorbis_quality'])
 192         processor.generate_thumb(thumb_size=process_info['thumb_size'])
 193         processor.store_orig_metadata()
 194     # Make state of entry as processed
 195     entry.state = u'processed'
 196     entry.save()
 197     _log.info(u'MediaEntry ID {0} is processed (transcoded to default'
 198               ' resolution): {1}'.format(entry.id, medium_size))
 199     _log.debug('MediaEntry processed')
 200
 201
 202 @celery.task()
 203 def complementary_task(entry_id, resolution, medium_size, **process_info):
 204     """
 205     Side celery task to transcode the video to other resolutions
 206     """
 207     entry, manager = get_entry_and_processing_manager(entry_id)
 208     with CommonVideoProcessor(manager, entry) as processor:
 209         processor.common_setup(resolution)
 210         processor.transcode(medium_size=tuple(medium_size),
 211                             vp8_quality=process_info['vp8_quality'],
 212                             vp8_threads=process_info['vp8_threads'],
 213                             vorbis_quality=process_info['vorbis_quality'])
 214     _log.info(u'MediaEntry ID {0} is transcoded to {1}'.format(
 215         entry.id, medium_size))
 216
 217
 218 @celery.task()
 219 def processing_cleanup(entry_id):
 220     _log.debug('Entered processing_cleanup')
 221     entry, manager = get_entry_and_processing_manager(entry_id)
 222     with CommonVideoProcessor(manager, entry) as processor:
 223         # no need to specify a resolution here
 224         processor.common_setup()
 225         processor.copy_original()
 226         processor.keep_best()
 227         processor.delete_queue_file()
 228     _log.debug('Deleted queue_file')
 229
 230
 231 class CommonVideoProcessor(MediaProcessor):
 232     """
 233     Provides a base for various video processing steps
 234     """
 235     acceptable_files = ['original, best_quality', 'webm_144p', 'webm_360p',
 236                         'webm_480p', 'webm_720p', 'webm_1080p', 'webm_video']
 237
 238     def common_setup(self, resolution=None):
 239         self.video_config = mgg \
 240             .global_config['plugins'][MEDIA_TYPE]
 241
 242         # Pull down and set up the processing file
 243         self.process_filename = get_process_filename(
 244             self.entry, self.workbench, self.acceptable_files)
 245         self.name_builder = FilenameBuilder(self.process_filename)
 246
 247         self.transcoder = transcoders.VideoTranscoder()
 248         self.did_transcode = False
 249
 250         if resolution:
 251             self.curr_file = 'webm_' + str(resolution)
 252             self.part_filename = (self.name_builder.fill('{basename}.' +
 253                                   str(resolution) + '.webm'))
 254         else:
 255             self.curr_file = 'webm_video'
 256             self.part_filename = self.name_builder.fill('{basename}.medium.webm')
 257
 258
 259     def copy_original(self):
 260         # If we didn't transcode, then we need to keep the original
 261         self.did_transcode = False
 262         for each_res in self.video_config['available_resolutions']:
 263             if 'webm_{}'.format(each_res) in self.entry.media_files:
 264                 self.did_transcode = True
 265                 break
 266         if not self.did_transcode or self.video_config['keep_original']:
 267             copy_original(
 268                 self.entry, self.process_filename,
 269                 self.name_builder.fill('{basename}{ext}'))
 270         self.entry.save()
 271
 272
 273     def keep_best(self):
 274         """
 275         If there is no original, keep the best file that we have
 276         """
 277         best_file = None
 278         best_file_dim = (0, 0)
 279         for each_res in self.video_config['available_resolutions']:
 280             curr_dim = ACCEPTED_RESOLUTIONS[each_res]
 281             if curr_dim[0] >= best_file_dim[0] and curr_dim[1] >= best_file_dim[1]:
 282                 best_file = each_res
 283                 best_file_dim = curr_dim
 284         if not self.entry.media_files.get('best_quality'):
 285             # Save the best quality file if no original?
 286             if not self.entry.media_files.get('original') and \
 287                     self.entry.media_files.get(str(best_file)):
 288                 self.entry.media_files['best_quality'] = self.entry \
 289                     .media_files[str(best_file)]
 290
 291
 292     def _skip_processing(self, keyname, **kwargs):
 293         file_metadata = self.entry.get_file_metadata(keyname)
 294
 295         if not file_metadata:
 296             return False
 297         skip = True
 298
 299         if 'webm' in keyname:
 300             if kwargs.get('medium_size') != file_metadata.get('medium_size'):
 301                 skip = False
 302             elif kwargs.get('vp8_quality') != file_metadata.get('vp8_quality'):
 303                 skip = False
 304             elif kwargs.get('vp8_threads') != file_metadata.get('vp8_threads'):
 305                 skip = False
 306             elif kwargs.get('vorbis_quality') != \
 307                     file_metadata.get('vorbis_quality'):
 308                 skip = False
 309         elif keyname == 'thumb':
 310             if kwargs.get('thumb_size') != file_metadata.get('thumb_size'):
 311                 skip = False
 312
 313         return skip
 314
 315
 316     def transcode(self, medium_size=None, vp8_quality=None, vp8_threads=None,
 317                   vorbis_quality=None):
 318         progress_callback = ProgressCallback(self.entry)
 319         tmp_dst = os.path.join(self.workbench.dir, self.part_filename)
 320
 321         if not medium_size:
 322             medium_size = (
 323                 mgg.global_config['media:medium']['max_width'],
 324                 mgg.global_config['media:medium']['max_height'])
 325         if not vp8_quality:
 326             vp8_quality = self.video_config['vp8_quality']
 327         if not vp8_threads:
 328             vp8_threads = self.video_config['vp8_threads']
 329         if not vorbis_quality:
 330             vorbis_quality = self.video_config['vorbis_quality']
 331
 332         file_metadata = {'medium_size': medium_size,
 333                          'vp8_threads': vp8_threads,
 334                          'vp8_quality': vp8_quality,
 335                          'vorbis_quality': vorbis_quality}
 336
 337         if self._skip_processing(self.curr_file, **file_metadata):
 338             return
 339
 340         metadata = transcoders.discover(self.process_filename)
 341         orig_dst_dimensions = (metadata.get_video_streams()[0].get_width(),
 342                                metadata.get_video_streams()[0].get_height())
 343
 344         # Figure out whether or not we need to transcode this video or
 345         # if we can skip it
 346         if skip_transcode(metadata, medium_size):
 347             _log.debug('Skipping transcoding')
 348
 349             # If there is an original and transcoded, delete the transcoded
 350             # since it must be of lower quality then the original
 351             if self.entry.media_files.get('original') and \
 352                self.entry.media_files.get(self.curr_file):
 353                 self.entry.media_files[self.curr_file].delete()
 354
 355         else:
 356             _log.debug('Entered transcoder')
 357             video_config = (mgg.global_config['plugins']
 358                             ['mediagoblin.media_types.video'])
 359             num_res = len(video_config['available_resolutions'])
 360             default_res = video_config['default_resolution']
 361             self.transcoder.transcode(self.process_filename, tmp_dst,
 362                                       default_res, num_res,
 363                                       vp8_quality=vp8_quality,
 364                                       vp8_threads=vp8_threads,
 365                                       vorbis_quality=vorbis_quality,
 366                                       progress_callback=progress_callback,
 367                                       dimensions=tuple(medium_size))
 368             if self.transcoder.dst_data:
 369                 # Push transcoded video to public storage
 370                 _log.debug('Saving medium...')
 371                 store_public(self.entry, self.curr_file, tmp_dst, self.part_filename)
 372                 _log.debug('Saved medium')
 373
 374                 self.entry.set_file_metadata(self.curr_file, **file_metadata)
 375
 376                 self.did_transcode = True
 377
 378     def generate_thumb(self, thumb_size=None):
 379         _log.debug("Enter generate_thumb()")
 380         # Temporary file for the video thumbnail (cleaned up with workbench)
 381         tmp_thumb = os.path.join(self.workbench.dir,
 382                                  self.name_builder.fill(
 383                                      '{basename}.thumbnail.jpg'))
 384
 385         if not thumb_size:
 386             thumb_size = (mgg.global_config['media:thumb']['max_width'],)
 387
 388         if self._skip_processing('thumb', thumb_size=thumb_size):
 389             return
 390
 391         # We will only use the width so that the correct scale is kept
 392         transcoders.capture_thumb(
 393             self.process_filename,
 394             tmp_thumb,
 395             thumb_size[0])
 396
 397         # Checking if the thumbnail was correctly created.  If it was not,
 398         # then just give up.
 399         if not os.path.exists (tmp_thumb):
 400             return
 401
 402         # Push the thumbnail to public storage
 403         _log.debug('Saving thumbnail...')
 404         store_public(self.entry, 'thumb', tmp_thumb,
 405                      self.name_builder.fill('{basename}.thumbnail.jpg'))
 406
 407         self.entry.set_file_metadata('thumb', thumb_size=thumb_size)
 408
 409     def store_orig_metadata(self):
 410         # Extract metadata and keep a record of it
 411         metadata = transcoders.discover(self.process_filename)
 412
 413         # metadata's stream info here is a DiscovererContainerInfo instance,
 414         # it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
 415         # metadata itself has container-related data in tags, like video-codec
 416         store_metadata(self.entry, metadata)
 417         _log.debug("Stored original video metadata")
 418
 419
 420 class InitialProcessor(CommonVideoProcessor):
 421     """
 422     Initial processing steps for new video
 423     """
 424     name = "initial"
 425     description = "Initial processing"
 426
 427     @classmethod
 428     def media_is_eligible(cls, entry=None, state=None):
 429         if not state:
 430             state = entry.state
 431         return state in (
 432             "unprocessed", "failed")
 433
 434     @classmethod
 435     def generate_parser(cls):
 436         parser = argparse.ArgumentParser(
 437             description=cls.description,
 438             prog=cls.name)
 439
 440         parser.add_argument(
 441             '--medium_size',
 442             nargs=2,
 443             metavar=('max_width', 'max_height'),
 444             type=int)
 445
 446         parser.add_argument(
 447             '--vp8_quality',
 448             type=int,
 449             help='Range 0..10')
 450
 451         parser.add_argument(
 452             '--vp8_threads',
 453             type=int,
 454             help='0 means number_of_CPUs - 1')
 455
 456         parser.add_argument(
 457             '--vorbis_quality',
 458             type=float,
 459             help='Range -0.1..1')
 460
 461         parser.add_argument(
 462             '--thumb_size',
 463             nargs=2,
 464             metavar=('max_width', 'max_height'),
 465             type=int)
 466
 467         return parser
 468
 469     @classmethod
 470     def args_to_request(cls, args):
 471         return request_from_args(
 472             args, ['medium_size', 'vp8_quality', 'vp8_threads',
 473                    'vorbis_quality', 'thumb_size'])
 474
 475     def process(self, medium_size=None, vp8_threads=None, vp8_quality=None,
 476                 vorbis_quality=None, thumb_size=None, resolution=None):
 477         self.common_setup(resolution=resolution)
 478         self.store_orig_metadata()
 479         self.transcode(medium_size=medium_size, vp8_quality=vp8_quality,
 480                        vp8_threads=vp8_threads, vorbis_quality=vorbis_quality)
 481
 482         self.generate_thumb(thumb_size=thumb_size)
 483         self.delete_queue_file()
 484
 485
 486 class Resizer(CommonVideoProcessor):
 487     """
 488     Video thumbnail resizing process steps for processed media
 489     """
 490     name = 'resize'
 491     description = 'Resize thumbnail'
 492     thumb_size = 'thumb_size'
 493
 494     @classmethod
 495     def media_is_eligible(cls, entry=None, state=None):
 496         if not state:
 497             state = entry.state
 498         return state in 'processed'
 499
 500     @classmethod
 501     def generate_parser(cls):
 502         parser = argparse.ArgumentParser(
 503             description=cls.description,
 504             prog=cls.name)
 505
 506         parser.add_argument(
 507             '--thumb_size',
 508             nargs=2,
 509             metavar=('max_width', 'max_height'),
 510             type=int)
 511
 512         # Needed for gmg reprocess thumbs to work
 513         parser.add_argument(
 514             'file',
 515             nargs='?',
 516             default='thumb',
 517             choices=['thumb'])
 518
 519         return parser
 520
 521     @classmethod
 522     def args_to_request(cls, args):
 523         return request_from_args(
 524             args, ['thumb_size', 'file'])
 525
 526     def process(self, thumb_size=None, file=None):
 527         self.common_setup()
 528         self.generate_thumb(thumb_size=thumb_size)
 529
 530
 531 class Transcoder(CommonVideoProcessor):
 532     """
 533     Transcoding processing steps for processed video
 534     """
 535     name = 'transcode'
 536     description = 'Re-transcode video'
 537
 538     @classmethod
 539     def media_is_eligible(cls, entry=None, state=None):
 540         if not state:
 541             state = entry.state
 542         return state in 'processed'
 543
 544     @classmethod
 545     def generate_parser(cls):
 546         parser = argparse.ArgumentParser(
 547             description=cls.description,
 548             prog=cls.name)
 549
 550         parser.add_argument(
 551             '--medium_size',
 552             nargs=2,
 553             metavar=('max_width', 'max_height'),
 554             type=int)
 555
 556         parser.add_argument(
 557             '--vp8_quality',
 558             type=int,
 559             help='Range 0..10')
 560
 561         parser.add_argument(
 562             '--vp8_threads',
 563             type=int,
 564             help='0 means number_of_CPUs - 1')
 565
 566         parser.add_argument(
 567             '--vorbis_quality',
 568             type=float,
 569             help='Range -0.1..1')
 570
 571         return parser
 572
 573     @classmethod
 574     def args_to_request(cls, args):
 575         return request_from_args(
 576             args, ['medium_size', 'vp8_threads', 'vp8_quality',
 577                    'vorbis_quality'])
 578
 579     def process(self, medium_size=None, vp8_quality=None, vp8_threads=None,
 580                 vorbis_quality=None):
 581         self.common_setup()
 582         self.transcode(medium_size=medium_size, vp8_threads=vp8_threads,
 583                        vp8_quality=vp8_quality, vorbis_quality=vorbis_quality)
 584
 585
 586 class VideoProcessingManager(ProcessingManager):
 587     def __init__(self):
 588         super(VideoProcessingManager, self).__init__()
 589         self.add_processor(InitialProcessor)
 590         self.add_processor(Resizer)
 591         self.add_processor(Transcoder)
 592
 593     def workflow(self, entry, feed_url, reprocess_action, reprocess_info=None):
 594
 595         video_config = mgg.global_config['plugins'][MEDIA_TYPE]
 596         def_res = video_config['default_resolution']
 597         priority_num = len(video_config['available_resolutions']) + 1
 598
 599         entry.state = u'processing'
 600         entry.save()
 601
 602         reprocess_info = reprocess_info or {}
 603         if 'vp8_quality' not in reprocess_info:
 604             reprocess_info['vp8_quality'] = None
 605         if 'vorbis_quality' not in reprocess_info:
 606             reprocess_info['vorbis_quality'] = None
 607         if 'vp8_threads' not in reprocess_info:
 608             reprocess_info['vp8_threads'] = None
 609         if 'thumb_size' not in reprocess_info:
 610             reprocess_info['thumb_size'] = None
 611
 612         tasks_list = [main_task.signature(args=(entry.id, def_res,
 613                                           ACCEPTED_RESOLUTIONS[def_res]),
 614                                           kwargs=reprocess_info, queue='default',
 615                                           priority=priority_num, immutable=True)]
 616
 617         for comp_res in video_config['available_resolutions']:
 618             if comp_res != def_res:
 619                 priority_num += -1
 620                 tasks_list.append(
 621                     complementary_task.signature(args=(entry.id, comp_res,
 622                                                  ACCEPTED_RESOLUTIONS[comp_res]),
 623                                                  kwargs=reprocess_info, queue='default',
 624                                                  priority=priority_num, immutable=True)
 625                 )
 626
 627         transcoding_tasks = group(tasks_list)
 628         cleanup_task = processing_cleanup.signature(args=(entry.id,),
 629                                                     queue='default', immutable=True)
 630
 631         return (transcoding_tasks, cleanup_task)