Fix saving original of the video
[mediagoblin.git] / mediagoblin / media_types / video / processing.py
... / ...
CommitLineData
1# GNU MediaGoblin -- federated, autonomous media hosting
2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import argparse
18import os.path
19import logging
20import datetime
21import celery
22
23import six
24
25from celery import group
26from mediagoblin import mg_globals as mgg
27from mediagoblin.processing import (
28 FilenameBuilder, BaseProcessingFail,
29 ProgressCallback, MediaProcessor,
30 ProcessingManager, request_from_args,
31 get_process_filename, store_public,
32 copy_original, get_entry_and_processing_manager)
33from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
34from mediagoblin.media_types import MissingComponents
35
36from . import transcoders
37from .util import skip_transcode, ACCEPTED_RESOLUTIONS
38
39_log = logging.getLogger(__name__)
40_log.setLevel(logging.DEBUG)
41
42MEDIA_TYPE = 'mediagoblin.media_types.video'
43
44
45class VideoTranscodingFail(BaseProcessingFail):
46 '''
47 Error raised if video transcoding fails
48 '''
49 general_message = _(u'Video transcoding failed')
50
51
52def sniffer(media_file):
53 '''New style sniffer, used in two-steps check; requires to have .name'''
54 _log.info('Sniffing {0}'.format(MEDIA_TYPE))
55 try:
56 data = transcoders.discover(media_file.name)
57 except Exception as e:
58 # this is usually GLib.GError, but we don't really care which one
59 _log.warning(u'GStreamer: {0}'.format(six.text_type(e)))
60 raise MissingComponents(u'GStreamer: {0}'.format(six.text_type(e)))
61 _log.debug('Discovered: {0}'.format(data))
62
63 if not data.get_video_streams():
64 raise MissingComponents('No video streams found in this video')
65
66 if data.get_result() != 0: # it's 0 if success
67 try:
68 missing = data.get_misc().get_string('name')
69 _log.warning('GStreamer: missing {0}'.format(missing))
70 except AttributeError as e:
71 # AttributeError happens here on gstreamer >1.4, when get_misc
72 # returns None. There is a special function to get info about
73 # missing plugin. This info should be printed to logs for admin and
74 # showed to the user in a short and nice version
75 details = data.get_missing_elements_installer_details()
76 _log.warning('GStreamer: missing: {0}'.format(', '.join(details)))
77 missing = u', '.join([u'{0} ({1})'.format(*d.split('|')[3:])
78 for d in details])
79 raise MissingComponents(u'{0} is missing'.format(missing))
80
81 return MEDIA_TYPE
82
83
84EXCLUDED_EXTS = ["nef", "svg"]
85
86def sniff_handler(media_file, filename):
87 name, ext = os.path.splitext(filename)
88 clean_ext = ext.lower()[1:]
89
90 if clean_ext in EXCLUDED_EXTS:
91 # We don't handle this filetype, though gstreamer might think we can
92 _log.info('Refused to process {0} due to excluded extension'.format(filename))
93 return None
94
95 try:
96 return sniffer(media_file)
97 except:
98 _log.error('Could not discover {0}'.format(filename))
99 return None
100
101def get_tags(stream_info):
102 'gets all tags and their values from stream info'
103 taglist = stream_info.get_tags()
104 if not taglist:
105 return {}
106 tags = []
107 taglist.foreach(
108 lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
109 tags = dict(tags)
110
111 # date/datetime should be converted from GDate/GDateTime to strings
112 if 'date' in tags:
113 date = tags['date']
114 tags['date'] = "%s-%s-%s" % (
115 date.year, date.month, date.day)
116
117 if 'datetime' in tags:
118 # TODO: handle timezone info; gst.get_time_zone_offset +
119 # python's tzinfo should help
120 dt = tags['datetime']
121 try:
122 tags['datetime'] = datetime.datetime(
123 dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
124 dt.get_minute(), dt.get_second(),
125 dt.get_microsecond()).isoformat()
126 except:
127 tags['datetime'] = None
128 for k, v in tags.copy().items():
129 # types below are accepted by json; others must not present
130 if not isinstance(v, (dict, list, six.string_types, int, float, bool,
131 type(None))):
132 del tags[k]
133 return dict(tags)
134
135def store_metadata(media_entry, metadata):
136 """
137 Store metadata from this video for this media entry.
138 """
139 stored_metadata = dict()
140 audio_info_list = metadata.get_audio_streams()
141 if audio_info_list:
142 stored_metadata['audio'] = []
143 for audio_info in audio_info_list:
144 stored_metadata['audio'].append(
145 {
146 'channels': audio_info.get_channels(),
147 'bitrate': audio_info.get_bitrate(),
148 'depth': audio_info.get_depth(),
149 'languange': audio_info.get_language(),
150 'sample_rate': audio_info.get_sample_rate(),
151 'tags': get_tags(audio_info)
152 })
153
154 video_info_list = metadata.get_video_streams()
155 if video_info_list:
156 stored_metadata['video'] = []
157 for video_info in video_info_list:
158 stored_metadata['video'].append(
159 {
160 'width': video_info.get_width(),
161 'height': video_info.get_height(),
162 'bitrate': video_info.get_bitrate(),
163 'depth': video_info.get_depth(),
164 'videorate': [video_info.get_framerate_num(),
165 video_info.get_framerate_denom()],
166 'tags': get_tags(video_info)
167 })
168
169 stored_metadata['common'] = {
170 'duration': metadata.get_duration(),
171 'tags': get_tags(metadata),
172 }
173 # Only save this field if there's something to save
174 if len(stored_metadata):
175 media_entry.media_data_init(orig_metadata=stored_metadata)
176
177
178@celery.task()
179def main_task(entry_id, resolution, medium_size, **process_info):
180 """
181 Main celery task to transcode the video to the default resolution
182 and store original video metadata.
183 """
184 _log.debug('MediaEntry processing')
185 entry, manager = get_entry_and_processing_manager(entry_id)
186 with CommonVideoProcessor(manager, entry) as processor:
187 processor.common_setup(resolution)
188 processor.transcode(medium_size=tuple(medium_size),
189 vp8_quality=process_info['vp8_quality'],
190 vp8_threads=process_info['vp8_threads'],
191 vorbis_quality=process_info['vorbis_quality'])
192 processor.generate_thumb(thumb_size=process_info['thumb_size'])
193 processor.store_orig_metadata()
194 # Make state of entry as processed
195 entry.state = u'processed'
196 entry.save()
197 _log.info(u'MediaEntry ID {0} is processed (transcoded to default'
198 ' resolution): {1}'.format(entry.id, medium_size))
199 _log.debug('MediaEntry processed')
200
201
202@celery.task()
203def complementary_task(entry_id, resolution, medium_size, **process_info):
204 """
205 Side celery task to transcode the video to other resolutions
206 """
207 entry, manager = get_entry_and_processing_manager(entry_id)
208 with CommonVideoProcessor(manager, entry) as processor:
209 processor.common_setup(resolution)
210 processor.transcode(medium_size=tuple(medium_size),
211 vp8_quality=process_info['vp8_quality'],
212 vp8_threads=process_info['vp8_threads'],
213 vorbis_quality=process_info['vorbis_quality'])
214 _log.info(u'MediaEntry ID {0} is transcoded to {1}'.format(
215 entry.id, medium_size))
216
217
218@celery.task()
219def processing_cleanup(entry_id):
220 _log.debug('Entered processing_cleanup')
221 entry, manager = get_entry_and_processing_manager(entry_id)
222 with CommonVideoProcessor(manager, entry) as processor:
223 # no need to specify a resolution here
224 processor.common_setup()
225 processor.copy_original()
226 processor.keep_best()
227 processor.delete_queue_file()
228 _log.debug('Deleted queue_file')
229
230
231class CommonVideoProcessor(MediaProcessor):
232 """
233 Provides a base for various video processing steps
234 """
235 acceptable_files = ['original, best_quality', 'webm_144p', 'webm_360p',
236 'webm_480p', 'webm_720p', 'webm_1080p', 'webm_video']
237
238 def common_setup(self, resolution=None):
239 self.video_config = mgg \
240 .global_config['plugins'][MEDIA_TYPE]
241
242 # Pull down and set up the processing file
243 self.process_filename = get_process_filename(
244 self.entry, self.workbench, self.acceptable_files)
245 self.name_builder = FilenameBuilder(self.process_filename)
246
247 self.transcoder = transcoders.VideoTranscoder()
248 self.did_transcode = False
249
250 if resolution:
251 self.curr_file = 'webm_' + str(resolution)
252 self.part_filename = (self.name_builder.fill('{basename}.' +
253 str(resolution) + '.webm'))
254 else:
255 self.curr_file = 'webm_video'
256 self.part_filename = self.name_builder.fill('{basename}.medium.webm')
257
258
259 def copy_original(self):
260 # If we didn't transcode, then we need to keep the original
261 self.did_transcode = False
262 for each_res in self.video_config['available_resolutions']:
263 if 'webm_{}'.format(each_res) in self.entry.media_files:
264 self.did_transcode = True
265 break
266 if not self.did_transcode or self.video_config['keep_original']:
267 copy_original(
268 self.entry, self.process_filename,
269 self.name_builder.fill('{basename}{ext}'))
270 self.entry.save()
271
272
273 def keep_best(self):
274 """
275 If there is no original, keep the best file that we have
276 """
277 best_file = None
278 best_file_dim = (0, 0)
279 for each_res in self.video_config['available_resolutions']:
280 curr_dim = ACCEPTED_RESOLUTIONS[each_res]
281 if curr_dim[0] >= best_file_dim[0] and curr_dim[1] >= best_file_dim[1]:
282 best_file = each_res
283 best_file_dim = curr_dim
284 if not self.entry.media_files.get('best_quality'):
285 # Save the best quality file if no original?
286 if not self.entry.media_files.get('original') and \
287 self.entry.media_files.get(str(best_file)):
288 self.entry.media_files['best_quality'] = self.entry \
289 .media_files[str(best_file)]
290
291
292 def _skip_processing(self, keyname, **kwargs):
293 file_metadata = self.entry.get_file_metadata(keyname)
294
295 if not file_metadata:
296 return False
297 skip = True
298
299 if 'webm' in keyname:
300 if kwargs.get('medium_size') != file_metadata.get('medium_size'):
301 skip = False
302 elif kwargs.get('vp8_quality') != file_metadata.get('vp8_quality'):
303 skip = False
304 elif kwargs.get('vp8_threads') != file_metadata.get('vp8_threads'):
305 skip = False
306 elif kwargs.get('vorbis_quality') != \
307 file_metadata.get('vorbis_quality'):
308 skip = False
309 elif keyname == 'thumb':
310 if kwargs.get('thumb_size') != file_metadata.get('thumb_size'):
311 skip = False
312
313 return skip
314
315
316 def transcode(self, medium_size=None, vp8_quality=None, vp8_threads=None,
317 vorbis_quality=None):
318 progress_callback = ProgressCallback(self.entry)
319 tmp_dst = os.path.join(self.workbench.dir, self.part_filename)
320
321 if not medium_size:
322 medium_size = (
323 mgg.global_config['media:medium']['max_width'],
324 mgg.global_config['media:medium']['max_height'])
325 if not vp8_quality:
326 vp8_quality = self.video_config['vp8_quality']
327 if not vp8_threads:
328 vp8_threads = self.video_config['vp8_threads']
329 if not vorbis_quality:
330 vorbis_quality = self.video_config['vorbis_quality']
331
332 file_metadata = {'medium_size': medium_size,
333 'vp8_threads': vp8_threads,
334 'vp8_quality': vp8_quality,
335 'vorbis_quality': vorbis_quality}
336
337 if self._skip_processing(self.curr_file, **file_metadata):
338 return
339
340 metadata = transcoders.discover(self.process_filename)
341 orig_dst_dimensions = (metadata.get_video_streams()[0].get_width(),
342 metadata.get_video_streams()[0].get_height())
343
344 # Figure out whether or not we need to transcode this video or
345 # if we can skip it
346 if skip_transcode(metadata, medium_size):
347 _log.debug('Skipping transcoding')
348
349 # If there is an original and transcoded, delete the transcoded
350 # since it must be of lower quality then the original
351 if self.entry.media_files.get('original') and \
352 self.entry.media_files.get(self.curr_file):
353 self.entry.media_files[self.curr_file].delete()
354
355 else:
356 _log.debug('Entered transcoder')
357 video_config = (mgg.global_config['plugins']
358 ['mediagoblin.media_types.video'])
359 num_res = len(video_config['available_resolutions'])
360 default_res = video_config['default_resolution']
361 self.transcoder.transcode(self.process_filename, tmp_dst,
362 default_res, num_res,
363 vp8_quality=vp8_quality,
364 vp8_threads=vp8_threads,
365 vorbis_quality=vorbis_quality,
366 progress_callback=progress_callback,
367 dimensions=tuple(medium_size))
368 if self.transcoder.dst_data:
369 # Push transcoded video to public storage
370 _log.debug('Saving medium...')
371 store_public(self.entry, self.curr_file, tmp_dst, self.part_filename)
372 _log.debug('Saved medium')
373
374 self.entry.set_file_metadata(self.curr_file, **file_metadata)
375
376 self.did_transcode = True
377
378 def generate_thumb(self, thumb_size=None):
379 _log.debug("Enter generate_thumb()")
380 # Temporary file for the video thumbnail (cleaned up with workbench)
381 tmp_thumb = os.path.join(self.workbench.dir,
382 self.name_builder.fill(
383 '{basename}.thumbnail.jpg'))
384
385 if not thumb_size:
386 thumb_size = (mgg.global_config['media:thumb']['max_width'],)
387
388 if self._skip_processing('thumb', thumb_size=thumb_size):
389 return
390
391 # We will only use the width so that the correct scale is kept
392 transcoders.capture_thumb(
393 self.process_filename,
394 tmp_thumb,
395 thumb_size[0])
396
397 # Checking if the thumbnail was correctly created. If it was not,
398 # then just give up.
399 if not os.path.exists (tmp_thumb):
400 return
401
402 # Push the thumbnail to public storage
403 _log.debug('Saving thumbnail...')
404 store_public(self.entry, 'thumb', tmp_thumb,
405 self.name_builder.fill('{basename}.thumbnail.jpg'))
406
407 self.entry.set_file_metadata('thumb', thumb_size=thumb_size)
408
409 def store_orig_metadata(self):
410 # Extract metadata and keep a record of it
411 metadata = transcoders.discover(self.process_filename)
412
413 # metadata's stream info here is a DiscovererContainerInfo instance,
414 # it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
415 # metadata itself has container-related data in tags, like video-codec
416 store_metadata(self.entry, metadata)
417 _log.debug("Stored original video metadata")
418
419
420class InitialProcessor(CommonVideoProcessor):
421 """
422 Initial processing steps for new video
423 """
424 name = "initial"
425 description = "Initial processing"
426
427 @classmethod
428 def media_is_eligible(cls, entry=None, state=None):
429 if not state:
430 state = entry.state
431 return state in (
432 "unprocessed", "failed")
433
434 @classmethod
435 def generate_parser(cls):
436 parser = argparse.ArgumentParser(
437 description=cls.description,
438 prog=cls.name)
439
440 parser.add_argument(
441 '--medium_size',
442 nargs=2,
443 metavar=('max_width', 'max_height'),
444 type=int)
445
446 parser.add_argument(
447 '--vp8_quality',
448 type=int,
449 help='Range 0..10')
450
451 parser.add_argument(
452 '--vp8_threads',
453 type=int,
454 help='0 means number_of_CPUs - 1')
455
456 parser.add_argument(
457 '--vorbis_quality',
458 type=float,
459 help='Range -0.1..1')
460
461 parser.add_argument(
462 '--thumb_size',
463 nargs=2,
464 metavar=('max_width', 'max_height'),
465 type=int)
466
467 return parser
468
469 @classmethod
470 def args_to_request(cls, args):
471 return request_from_args(
472 args, ['medium_size', 'vp8_quality', 'vp8_threads',
473 'vorbis_quality', 'thumb_size'])
474
475 def process(self, medium_size=None, vp8_threads=None, vp8_quality=None,
476 vorbis_quality=None, thumb_size=None, resolution=None):
477 self.common_setup(resolution=resolution)
478 self.store_orig_metadata()
479 self.transcode(medium_size=medium_size, vp8_quality=vp8_quality,
480 vp8_threads=vp8_threads, vorbis_quality=vorbis_quality)
481
482 self.generate_thumb(thumb_size=thumb_size)
483 self.delete_queue_file()
484
485
486class Resizer(CommonVideoProcessor):
487 """
488 Video thumbnail resizing process steps for processed media
489 """
490 name = 'resize'
491 description = 'Resize thumbnail'
492 thumb_size = 'thumb_size'
493
494 @classmethod
495 def media_is_eligible(cls, entry=None, state=None):
496 if not state:
497 state = entry.state
498 return state in 'processed'
499
500 @classmethod
501 def generate_parser(cls):
502 parser = argparse.ArgumentParser(
503 description=cls.description,
504 prog=cls.name)
505
506 parser.add_argument(
507 '--thumb_size',
508 nargs=2,
509 metavar=('max_width', 'max_height'),
510 type=int)
511
512 # Needed for gmg reprocess thumbs to work
513 parser.add_argument(
514 'file',
515 nargs='?',
516 default='thumb',
517 choices=['thumb'])
518
519 return parser
520
521 @classmethod
522 def args_to_request(cls, args):
523 return request_from_args(
524 args, ['thumb_size', 'file'])
525
526 def process(self, thumb_size=None, file=None):
527 self.common_setup()
528 self.generate_thumb(thumb_size=thumb_size)
529
530
531class Transcoder(CommonVideoProcessor):
532 """
533 Transcoding processing steps for processed video
534 """
535 name = 'transcode'
536 description = 'Re-transcode video'
537
538 @classmethod
539 def media_is_eligible(cls, entry=None, state=None):
540 if not state:
541 state = entry.state
542 return state in 'processed'
543
544 @classmethod
545 def generate_parser(cls):
546 parser = argparse.ArgumentParser(
547 description=cls.description,
548 prog=cls.name)
549
550 parser.add_argument(
551 '--medium_size',
552 nargs=2,
553 metavar=('max_width', 'max_height'),
554 type=int)
555
556 parser.add_argument(
557 '--vp8_quality',
558 type=int,
559 help='Range 0..10')
560
561 parser.add_argument(
562 '--vp8_threads',
563 type=int,
564 help='0 means number_of_CPUs - 1')
565
566 parser.add_argument(
567 '--vorbis_quality',
568 type=float,
569 help='Range -0.1..1')
570
571 return parser
572
573 @classmethod
574 def args_to_request(cls, args):
575 return request_from_args(
576 args, ['medium_size', 'vp8_threads', 'vp8_quality',
577 'vorbis_quality'])
578
579 def process(self, medium_size=None, vp8_quality=None, vp8_threads=None,
580 vorbis_quality=None):
581 self.common_setup()
582 self.transcode(medium_size=medium_size, vp8_threads=vp8_threads,
583 vp8_quality=vp8_quality, vorbis_quality=vorbis_quality)
584
585
586class VideoProcessingManager(ProcessingManager):
587 def __init__(self):
588 super(VideoProcessingManager, self).__init__()
589 self.add_processor(InitialProcessor)
590 self.add_processor(Resizer)
591 self.add_processor(Transcoder)
592
593 def workflow(self, entry, feed_url, reprocess_action, reprocess_info=None):
594
595 video_config = mgg.global_config['plugins'][MEDIA_TYPE]
596 def_res = video_config['default_resolution']
597 priority_num = len(video_config['available_resolutions']) + 1
598
599 entry.state = u'processing'
600 entry.save()
601
602 reprocess_info = reprocess_info or {}
603 if 'vp8_quality' not in reprocess_info:
604 reprocess_info['vp8_quality'] = None
605 if 'vorbis_quality' not in reprocess_info:
606 reprocess_info['vorbis_quality'] = None
607 if 'vp8_threads' not in reprocess_info:
608 reprocess_info['vp8_threads'] = None
609 if 'thumb_size' not in reprocess_info:
610 reprocess_info['thumb_size'] = None
611
612 tasks_list = [main_task.signature(args=(entry.id, def_res,
613 ACCEPTED_RESOLUTIONS[def_res]),
614 kwargs=reprocess_info, queue='default',
615 priority=priority_num, immutable=True)]
616
617 for comp_res in video_config['available_resolutions']:
618 if comp_res != def_res:
619 priority_num += -1
620 tasks_list.append(
621 complementary_task.signature(args=(entry.id, comp_res,
622 ACCEPTED_RESOLUTIONS[comp_res]),
623 kwargs=reprocess_info, queue='default',
624 priority=priority_num, immutable=True)
625 )
626
627 transcoding_tasks = group(tasks_list)
628 cleanup_task = processing_cleanup.signature(args=(entry.id,),
629 queue='default', immutable=True)
630
631 return (transcoding_tasks, cleanup_task)