Add workflow method to VideoProcessingManager
[mediagoblin.git] / mediagoblin / media_types / video / processing.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import argparse
18 import os.path
19 import logging
20 import datetime
21
22 import six
23
24 from mediagoblin import mg_globals as mgg
25 from mediagoblin.processing import (
26 FilenameBuilder, BaseProcessingFail,
27 ProgressCallback, MediaProcessor,
28 ProcessingManager, request_from_args,
29 get_process_filename, store_public,
30 copy_original)
31 from mediagoblin.processing.task import ProcessMedia
32 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
33 from mediagoblin.media_types import MissingComponents
34
35 from . import transcoders
36 from .util import skip_transcode
37
38 _log = logging.getLogger(__name__)
39 _log.setLevel(logging.DEBUG)
40
41 MEDIA_TYPE = 'mediagoblin.media_types.video'
42
43
44 class VideoTranscodingFail(BaseProcessingFail):
45 '''
46 Error raised if video transcoding fails
47 '''
48 general_message = _(u'Video transcoding failed')
49
50
51 def sniffer(media_file):
52 '''New style sniffer, used in two-steps check; requires to have .name'''
53 _log.info('Sniffing {0}'.format(MEDIA_TYPE))
54 try:
55 data = transcoders.discover(media_file.name)
56 except Exception as e:
57 # this is usually GLib.GError, but we don't really care which one
58 _log.warning(u'GStreamer: {0}'.format(six.text_type(e)))
59 raise MissingComponents(u'GStreamer: {0}'.format(six.text_type(e)))
60 _log.debug('Discovered: {0}'.format(data))
61
62 if not data.get_video_streams():
63 raise MissingComponents('No video streams found in this video')
64
65 if data.get_result() != 0: # it's 0 if success
66 try:
67 missing = data.get_misc().get_string('name')
68 _log.warning('GStreamer: missing {0}'.format(missing))
69 except AttributeError as e:
70 # AttributeError happens here on gstreamer >1.4, when get_misc
71 # returns None. There is a special function to get info about
72 # missing plugin. This info should be printed to logs for admin and
73 # showed to the user in a short and nice version
74 details = data.get_missing_elements_installer_details()
75 _log.warning('GStreamer: missing: {0}'.format(', '.join(details)))
76 missing = u', '.join([u'{0} ({1})'.format(*d.split('|')[3:])
77 for d in details])
78 raise MissingComponents(u'{0} is missing'.format(missing))
79
80 return MEDIA_TYPE
81
82
83 def sniff_handler(media_file, filename):
84 try:
85 return sniffer(media_file)
86 except:
87 _log.error('Could not discover {0}'.format(filename))
88 return None
89
90 def get_tags(stream_info):
91 'gets all tags and their values from stream info'
92 taglist = stream_info.get_tags()
93 if not taglist:
94 return {}
95 tags = []
96 taglist.foreach(
97 lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
98 tags = dict(tags)
99
100 # date/datetime should be converted from GDate/GDateTime to strings
101 if 'date' in tags:
102 date = tags['date']
103 tags['date'] = "%s-%s-%s" % (
104 date.year, date.month, date.day)
105
106 if 'datetime' in tags:
107 # TODO: handle timezone info; gst.get_time_zone_offset +
108 # python's tzinfo should help
109 dt = tags['datetime']
110 tags['datetime'] = datetime.datetime(
111 dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
112 dt.get_minute(), dt.get_second(),
113 dt.get_microsecond()).isoformat()
114 for k, v in tags.copy().items():
115 # types below are accepted by json; others must not present
116 if not isinstance(v, (dict, list, six.string_types, int, float, bool,
117 type(None))):
118 del tags[k]
119 return dict(tags)
120
121 def store_metadata(media_entry, metadata):
122 """
123 Store metadata from this video for this media entry.
124 """
125 stored_metadata = dict()
126 audio_info_list = metadata.get_audio_streams()
127 if audio_info_list:
128 stored_metadata['audio'] = []
129 for audio_info in audio_info_list:
130 stored_metadata['audio'].append(
131 {
132 'channels': audio_info.get_channels(),
133 'bitrate': audio_info.get_bitrate(),
134 'depth': audio_info.get_depth(),
135 'languange': audio_info.get_language(),
136 'sample_rate': audio_info.get_sample_rate(),
137 'tags': get_tags(audio_info)
138 })
139
140 video_info_list = metadata.get_video_streams()
141 if video_info_list:
142 stored_metadata['video'] = []
143 for video_info in video_info_list:
144 stored_metadata['video'].append(
145 {
146 'width': video_info.get_width(),
147 'height': video_info.get_height(),
148 'bitrate': video_info.get_bitrate(),
149 'depth': video_info.get_depth(),
150 'videorate': [video_info.get_framerate_num(),
151 video_info.get_framerate_denom()],
152 'tags': get_tags(video_info)
153 })
154
155 stored_metadata['common'] = {
156 'duration': metadata.get_duration(),
157 'tags': get_tags(metadata),
158 }
159 # Only save this field if there's something to save
160 if len(stored_metadata):
161 media_entry.media_data_init(orig_metadata=stored_metadata)
162
163
164 class CommonVideoProcessor(MediaProcessor):
165 """
166 Provides a base for various video processing steps
167 """
168 acceptable_files = ['original', 'best_quality', 'webm_video']
169
170 def common_setup(self):
171 self.video_config = mgg \
172 .global_config['plugins'][MEDIA_TYPE]
173
174 # Pull down and set up the processing file
175 self.process_filename = get_process_filename(
176 self.entry, self.workbench, self.acceptable_files)
177 self.name_builder = FilenameBuilder(self.process_filename)
178
179 self.transcoder = transcoders.VideoTranscoder()
180 self.did_transcode = False
181
182 # Extract metadata and keep a record of it
183 self.metadata = transcoders.discover(self.process_filename)
184
185 # metadata's stream info here is a DiscovererContainerInfo instance,
186 # it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
187 # metadata itself has container-related data in tags, like video-codec
188 store_metadata(self.entry, self.metadata)
189
190 def copy_original(self):
191 # If we didn't transcode, then we need to keep the original
192 if not self.did_transcode or \
193 (self.video_config['keep_original'] and self.did_transcode):
194 copy_original(
195 self.entry, self.process_filename,
196 self.name_builder.fill('{basename}{ext}'))
197
198 def _keep_best(self):
199 """
200 If there is no original, keep the best file that we have
201 """
202 if not self.entry.media_files.get('best_quality'):
203 # Save the best quality file if no original?
204 if not self.entry.media_files.get('original') and \
205 self.entry.media_files.get('webm_video'):
206 self.entry.media_files['best_quality'] = self.entry \
207 .media_files['webm_video']
208
209 def _skip_processing(self, keyname, **kwargs):
210 file_metadata = self.entry.get_file_metadata(keyname)
211
212 if not file_metadata:
213 return False
214 skip = True
215
216 if keyname == 'webm_video':
217 if kwargs.get('medium_size') != file_metadata.get('medium_size'):
218 skip = False
219 elif kwargs.get('vp8_quality') != file_metadata.get('vp8_quality'):
220 skip = False
221 elif kwargs.get('vp8_threads') != file_metadata.get('vp8_threads'):
222 skip = False
223 elif kwargs.get('vorbis_quality') != \
224 file_metadata.get('vorbis_quality'):
225 skip = False
226 elif keyname == 'thumb':
227 if kwargs.get('thumb_size') != file_metadata.get('thumb_size'):
228 skip = False
229
230 return skip
231
232
233 def transcode(self, medium_size=None, vp8_quality=None, vp8_threads=None,
234 vorbis_quality=None):
235 progress_callback = ProgressCallback(self.entry)
236 tmp_dst = os.path.join(self.workbench.dir,
237 self.name_builder.fill('{basename}.medium.webm'))
238
239 if not medium_size:
240 medium_size = (
241 mgg.global_config['media:medium']['max_width'],
242 mgg.global_config['media:medium']['max_height'])
243 if not vp8_quality:
244 vp8_quality = self.video_config['vp8_quality']
245 if not vp8_threads:
246 vp8_threads = self.video_config['vp8_threads']
247 if not vorbis_quality:
248 vorbis_quality = self.video_config['vorbis_quality']
249
250 file_metadata = {'medium_size': medium_size,
251 'vp8_threads': vp8_threads,
252 'vp8_quality': vp8_quality,
253 'vorbis_quality': vorbis_quality}
254
255 if self._skip_processing('webm_video', **file_metadata):
256 return
257
258 orig_dst_dimensions = (self.metadata.get_video_streams()[0].get_width(),
259 self.metadata.get_video_streams()[0].get_height())
260
261 # Figure out whether or not we need to transcode this video or
262 # if we can skip it
263 if skip_transcode(self.metadata, medium_size):
264 _log.debug('Skipping transcoding')
265
266 dst_dimensions = orig_dst_dimensions
267
268 # If there is an original and transcoded, delete the transcoded
269 # since it must be of lower quality then the original
270 if self.entry.media_files.get('original') and \
271 self.entry.media_files.get('webm_video'):
272 self.entry.media_files['webm_video'].delete()
273
274 else:
275 self.transcoder.transcode(self.process_filename, tmp_dst,
276 vp8_quality=vp8_quality,
277 vp8_threads=vp8_threads,
278 vorbis_quality=vorbis_quality,
279 progress_callback=progress_callback,
280 dimensions=tuple(medium_size))
281 if self.transcoder.dst_data:
282 video_info = self.transcoder.dst_data.get_video_streams()[0]
283 dst_dimensions = (video_info.get_width(),
284 video_info.get_height())
285 self._keep_best()
286
287 # Push transcoded video to public storage
288 _log.debug('Saving medium...')
289 store_public(self.entry, 'webm_video', tmp_dst,
290 self.name_builder.fill('{basename}.medium.webm'))
291 _log.debug('Saved medium')
292
293 self.entry.set_file_metadata('webm_video', **file_metadata)
294
295 self.did_transcode = True
296 else:
297 dst_dimensions = orig_dst_dimensions
298
299 # Save the width and height of the transcoded video
300 self.entry.media_data_init(
301 width=dst_dimensions[0],
302 height=dst_dimensions[1])
303
304 def generate_thumb(self, thumb_size=None):
305 # Temporary file for the video thumbnail (cleaned up with workbench)
306 tmp_thumb = os.path.join(self.workbench.dir,
307 self.name_builder.fill(
308 '{basename}.thumbnail.jpg'))
309
310 if not thumb_size:
311 thumb_size = (mgg.global_config['media:thumb']['max_width'],)
312
313 if self._skip_processing('thumb', thumb_size=thumb_size):
314 return
315
316 # We will only use the width so that the correct scale is kept
317 transcoders.capture_thumb(
318 self.process_filename,
319 tmp_thumb,
320 thumb_size[0])
321
322 # Checking if the thumbnail was correctly created. If it was not,
323 # then just give up.
324 if not os.path.exists (tmp_thumb):
325 return
326
327 # Push the thumbnail to public storage
328 _log.debug('Saving thumbnail...')
329 store_public(self.entry, 'thumb', tmp_thumb,
330 self.name_builder.fill('{basename}.thumbnail.jpg'))
331
332 self.entry.set_file_metadata('thumb', thumb_size=thumb_size)
333
334 class InitialProcessor(CommonVideoProcessor):
335 """
336 Initial processing steps for new video
337 """
338 name = "initial"
339 description = "Initial processing"
340
341 @classmethod
342 def media_is_eligible(cls, entry=None, state=None):
343 if not state:
344 state = entry.state
345 return state in (
346 "unprocessed", "failed")
347
348 @classmethod
349 def generate_parser(cls):
350 parser = argparse.ArgumentParser(
351 description=cls.description,
352 prog=cls.name)
353
354 parser.add_argument(
355 '--medium_size',
356 nargs=2,
357 metavar=('max_width', 'max_height'),
358 type=int)
359
360 parser.add_argument(
361 '--vp8_quality',
362 type=int,
363 help='Range 0..10')
364
365 parser.add_argument(
366 '--vp8_threads',
367 type=int,
368 help='0 means number_of_CPUs - 1')
369
370 parser.add_argument(
371 '--vorbis_quality',
372 type=float,
373 help='Range -0.1..1')
374
375 parser.add_argument(
376 '--thumb_size',
377 nargs=2,
378 metavar=('max_width', 'max_height'),
379 type=int)
380
381 return parser
382
383 @classmethod
384 def args_to_request(cls, args):
385 return request_from_args(
386 args, ['medium_size', 'vp8_quality', 'vp8_threads',
387 'vorbis_quality', 'thumb_size'])
388
389 def process(self, medium_size=None, vp8_threads=None, vp8_quality=None,
390 vorbis_quality=None, thumb_size=None):
391 self.common_setup()
392
393 self.transcode(medium_size=medium_size, vp8_quality=vp8_quality,
394 vp8_threads=vp8_threads, vorbis_quality=vorbis_quality)
395
396 self.copy_original()
397 self.generate_thumb(thumb_size=thumb_size)
398 self.delete_queue_file()
399
400
401 class Resizer(CommonVideoProcessor):
402 """
403 Video thumbnail resizing process steps for processed media
404 """
405 name = 'resize'
406 description = 'Resize thumbnail'
407 thumb_size = 'thumb_size'
408
409 @classmethod
410 def media_is_eligible(cls, entry=None, state=None):
411 if not state:
412 state = entry.state
413 return state in 'processed'
414
415 @classmethod
416 def generate_parser(cls):
417 parser = argparse.ArgumentParser(
418 description=cls.description,
419 prog=cls.name)
420
421 parser.add_argument(
422 '--thumb_size',
423 nargs=2,
424 metavar=('max_width', 'max_height'),
425 type=int)
426
427 # Needed for gmg reprocess thumbs to work
428 parser.add_argument(
429 'file',
430 nargs='?',
431 default='thumb',
432 choices=['thumb'])
433
434 return parser
435
436 @classmethod
437 def args_to_request(cls, args):
438 return request_from_args(
439 args, ['thumb_size', 'file'])
440
441 def process(self, thumb_size=None, file=None):
442 self.common_setup()
443 self.generate_thumb(thumb_size=thumb_size)
444
445
446 class Transcoder(CommonVideoProcessor):
447 """
448 Transcoding processing steps for processed video
449 """
450 name = 'transcode'
451 description = 'Re-transcode video'
452
453 @classmethod
454 def media_is_eligible(cls, entry=None, state=None):
455 if not state:
456 state = entry.state
457 return state in 'processed'
458
459 @classmethod
460 def generate_parser(cls):
461 parser = argparse.ArgumentParser(
462 description=cls.description,
463 prog=cls.name)
464
465 parser.add_argument(
466 '--medium_size',
467 nargs=2,
468 metavar=('max_width', 'max_height'),
469 type=int)
470
471 parser.add_argument(
472 '--vp8_quality',
473 type=int,
474 help='Range 0..10')
475
476 parser.add_argument(
477 '--vp8_threads',
478 type=int,
479 help='0 means number_of_CPUs - 1')
480
481 parser.add_argument(
482 '--vorbis_quality',
483 type=float,
484 help='Range -0.1..1')
485
486 return parser
487
488 @classmethod
489 def args_to_request(cls, args):
490 return request_from_args(
491 args, ['medium_size', 'vp8_threads', 'vp8_quality',
492 'vorbis_quality'])
493
494 def process(self, medium_size=None, vp8_quality=None, vp8_threads=None,
495 vorbis_quality=None):
496 self.common_setup()
497 self.transcode(medium_size=medium_size, vp8_threads=vp8_threads,
498 vp8_quality=vp8_quality, vorbis_quality=vorbis_quality)
499
500
501 class VideoProcessingManager(ProcessingManager):
502 def __init__(self):
503 super(VideoProcessingManager, self).__init__()
504 self.add_processor(InitialProcessor)
505 self.add_processor(Resizer)
506 self.add_processor(Transcoder)
507
508 def workflow(self, entry, feed_url, reprocess_action, reprocess_info=None):
509 ProcessMedia().apply_async(
510 [entry.id, feed_url, reprocess_action, reprocess_info], {},
511 task_id=entry.queued_task_id)