Merge remote-tracking branch 'gsoc2016/Subtitle-1'
[mediagoblin.git] / mediagoblin / media_types / video / processing.py
CommitLineData
93bdab9d 1# GNU MediaGoblin -- federated, autonomous media hosting
cf29e8a8 2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
93bdab9d
JW
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
57d1cb3c 17import argparse
2ed6afb0 18import os.path
e9c1b938 19import logging
d0ceb506 20import datetime
93bdab9d 21
896d00fb
BP
22import six
23
93bdab9d 24from mediagoblin import mg_globals as mgg
347ef583
RE
25from mediagoblin.processing import (
26 FilenameBuilder, BaseProcessingFail,
27 ProgressCallback, MediaProcessor,
28 ProcessingManager, request_from_args,
1cefccc7 29 get_process_filename, store_public,
347ef583 30 copy_original)
51eb0267 31from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
54b4b28f 32from mediagoblin.media_types import MissingComponents
51eb0267 33
26729e02 34from . import transcoders
5c754fda
JW
35from .util import skip_transcode
36
8e5f9746
JW
37_log = logging.getLogger(__name__)
38_log.setLevel(logging.DEBUG)
93bdab9d 39
cbac4a7f
RE
40MEDIA_TYPE = 'mediagoblin.media_types.video'
41
93bdab9d 42
51eb0267
JW
43class VideoTranscodingFail(BaseProcessingFail):
44 '''
45 Error raised if video transcoding fails
46 '''
47 general_message = _(u'Video transcoding failed')
48
49
54b4b28f
BB
50def sniffer(media_file):
51 '''New style sniffer, used in two-steps check; requires to have .name'''
cbac4a7f 52 _log.info('Sniffing {0}'.format(MEDIA_TYPE))
54b4b28f
BB
53 try:
54 data = transcoders.discover(media_file.name)
55 except Exception as e:
56 # this is usually GLib.GError, but we don't really care which one
896d00fb
BP
57 _log.warning(u'GStreamer: {0}'.format(six.text_type(e)))
58 raise MissingComponents(u'GStreamer: {0}'.format(six.text_type(e)))
4f4f2531 59 _log.debug('Discovered: {0}'.format(data))
10085b77 60
54b4b28f
BB
61 if not data.get_video_streams():
62 raise MissingComponents('No video streams found in this video')
26729e02 63
54b4b28f 64 if data.get_result() != 0: # it's 0 if success
6e4eccb1
BB
65 try:
66 missing = data.get_misc().get_string('name')
67 _log.warning('GStreamer: missing {0}'.format(missing))
68 except AttributeError as e:
69 # AttributeError happens here on gstreamer >1.4, when get_misc
70 # returns None. There is a special function to get info about
71 # missing plugin. This info should be printed to logs for admin and
72 # showed to the user in a short and nice version
73 details = data.get_missing_elements_installer_details()
74 _log.warning('GStreamer: missing: {0}'.format(', '.join(details)))
75 missing = u', '.join([u'{0} ({1})'.format(*d.split('|')[3:])
76 for d in details])
77 raise MissingComponents(u'{0} is missing'.format(missing))
26729e02 78
54b4b28f 79 return MEDIA_TYPE
93bdab9d 80
bfd68cce 81
9a23a816
AB
82EXCLUDED_EXTS = ["nef", "svg"]
83
54b4b28f 84def sniff_handler(media_file, filename):
9a23a816
AB
85 name, ext = os.path.splitext(filename)
86 clean_ext = ext.lower()[1:]
87
88 if clean_ext in EXCLUDED_EXTS:
89 # We don't handle this filetype, though gstreamer might think we can
90 _log.info('Refused to process {0} due to excluded extension'.format(filename))
91 return None
92
54b4b28f
BB
93 try:
94 return sniffer(media_file)
95 except:
96 _log.error('Could not discover {0}'.format(filename))
97 return None
98
2d1e8905
BB
99def get_tags(stream_info):
100 'gets all tags and their values from stream info'
101 taglist = stream_info.get_tags()
102 if not taglist:
103 return {}
104 tags = []
105 taglist.foreach(
106 lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
107 tags = dict(tags)
108
109 # date/datetime should be converted from GDate/GDateTime to strings
110 if 'date' in tags:
111 date = tags['date']
112 tags['date'] = "%s-%s-%s" % (
113 date.year, date.month, date.day)
114
115 if 'datetime' in tags:
116 # TODO: handle timezone info; gst.get_time_zone_offset +
117 # python's tzinfo should help
118 dt = tags['datetime']
a35007cc
AB
119 try:
120 tags['datetime'] = datetime.datetime(
121 dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
122 dt.get_minute(), dt.get_second(),
123 dt.get_microsecond()).isoformat()
124 except:
125 tags['datetime'] = None
f13225fa 126 for k, v in tags.copy().items():
2d1e8905 127 # types below are accepted by json; others must not present
896d00fb 128 if not isinstance(v, (dict, list, six.string_types, int, float, bool,
2d1e8905
BB
129 type(None))):
130 del tags[k]
131 return dict(tags)
132
29adab46
CAW
133def store_metadata(media_entry, metadata):
134 """
135 Store metadata from this video for this media entry.
136 """
7e266d5a
BB
137 stored_metadata = dict()
138 audio_info_list = metadata.get_audio_streams()
139 if audio_info_list:
2d1e8905
BB
140 stored_metadata['audio'] = []
141 for audio_info in audio_info_list:
142 stored_metadata['audio'].append(
143 {
144 'channels': audio_info.get_channels(),
145 'bitrate': audio_info.get_bitrate(),
146 'depth': audio_info.get_depth(),
147 'languange': audio_info.get_language(),
148 'sample_rate': audio_info.get_sample_rate(),
149 'tags': get_tags(audio_info)
150 })
151
152 video_info_list = metadata.get_video_streams()
153 if video_info_list:
154 stored_metadata['video'] = []
155 for video_info in video_info_list:
156 stored_metadata['video'].append(
157 {
158 'width': video_info.get_width(),
159 'height': video_info.get_height(),
160 'bitrate': video_info.get_bitrate(),
161 'depth': video_info.get_depth(),
162 'videorate': [video_info.get_framerate_num(),
163 video_info.get_framerate_denom()],
164 'tags': get_tags(video_info)
165 })
166
167 stored_metadata['common'] = {
168 'duration': metadata.get_duration(),
169 'tags': get_tags(metadata),
170 }
4f239ff1
CAW
171 # Only save this field if there's something to save
172 if len(stored_metadata):
2d1e8905 173 media_entry.media_data_init(orig_metadata=stored_metadata)
347ef583
RE
174
175
176class CommonVideoProcessor(MediaProcessor):
177 """
178 Provides a base for various video processing steps
179 """
9de4fab4 180 acceptable_files = ['original', 'best_quality', 'webm_video']
347ef583
RE
181
182 def common_setup(self):
183 self.video_config = mgg \
9a6741d7 184 .global_config['plugins'][MEDIA_TYPE]
347ef583 185
1cefccc7
RE
186 # Pull down and set up the processing file
187 self.process_filename = get_process_filename(
188 self.entry, self.workbench, self.acceptable_files)
189 self.name_builder = FilenameBuilder(self.process_filename)
347ef583
RE
190
191 self.transcoder = transcoders.VideoTranscoder()
192 self.did_transcode = False
193
194 def copy_original(self):
195 # If we didn't transcode, then we need to keep the original
196 if not self.did_transcode or \
197 (self.video_config['keep_original'] and self.did_transcode):
198 copy_original(
1cefccc7 199 self.entry, self.process_filename,
347ef583
RE
200 self.name_builder.fill('{basename}{ext}'))
201
0a8c0c70
RE
202 def _keep_best(self):
203 """
204 If there is no original, keep the best file that we have
205 """
206 if not self.entry.media_files.get('best_quality'):
207 # Save the best quality file if no original?
208 if not self.entry.media_files.get('original') and \
9de4fab4 209 self.entry.media_files.get('webm_video'):
0a8c0c70 210 self.entry.media_files['best_quality'] = self.entry \
9de4fab4 211 .media_files['webm_video']
0a8c0c70 212
4c617543
RE
213 def _skip_processing(self, keyname, **kwargs):
214 file_metadata = self.entry.get_file_metadata(keyname)
215
216 if not file_metadata:
217 return False
218 skip = True
219
220 if keyname == 'webm_video':
221 if kwargs.get('medium_size') != file_metadata.get('medium_size'):
222 skip = False
223 elif kwargs.get('vp8_quality') != file_metadata.get('vp8_quality'):
224 skip = False
225 elif kwargs.get('vp8_threads') != file_metadata.get('vp8_threads'):
226 skip = False
227 elif kwargs.get('vorbis_quality') != \
228 file_metadata.get('vorbis_quality'):
229 skip = False
230 elif keyname == 'thumb':
231 if kwargs.get('thumb_size') != file_metadata.get('thumb_size'):
232 skip = False
233
234 return skip
235
0a8c0c70 236
347ef583
RE
237 def transcode(self, medium_size=None, vp8_quality=None, vp8_threads=None,
238 vorbis_quality=None):
57d1cb3c 239 progress_callback = ProgressCallback(self.entry)
347ef583 240 tmp_dst = os.path.join(self.workbench.dir,
9de4fab4 241 self.name_builder.fill('{basename}.medium.webm'))
347ef583
RE
242
243 if not medium_size:
244 medium_size = (
245 mgg.global_config['media:medium']['max_width'],
246 mgg.global_config['media:medium']['max_height'])
247 if not vp8_quality:
248 vp8_quality = self.video_config['vp8_quality']
249 if not vp8_threads:
250 vp8_threads = self.video_config['vp8_threads']
251 if not vorbis_quality:
252 vorbis_quality = self.video_config['vorbis_quality']
253
4c617543
RE
254 file_metadata = {'medium_size': medium_size,
255 'vp8_threads': vp8_threads,
256 'vp8_quality': vp8_quality,
257 'vorbis_quality': vorbis_quality}
258
259 if self._skip_processing('webm_video', **file_metadata):
260 return
261
347ef583 262 # Extract metadata and keep a record of it
91f5f5e7 263 metadata = transcoders.discover(self.process_filename)
54b4b28f 264
91f5f5e7
BB
265 # metadata's stream info here is a DiscovererContainerInfo instance,
266 # it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
267 # metadata itself has container-related data in tags, like video-codec
347ef583
RE
268 store_metadata(self.entry, metadata)
269
bd50f8bf
BB
270 orig_dst_dimensions = (metadata.get_video_streams()[0].get_width(),
271 metadata.get_video_streams()[0].get_height())
272
347ef583
RE
273 # Figure out whether or not we need to transcode this video or
274 # if we can skip it
8bb0df62 275 if skip_transcode(metadata, medium_size):
347ef583
RE
276 _log.debug('Skipping transcoding')
277
bd50f8bf 278 dst_dimensions = orig_dst_dimensions
347ef583 279
1cefccc7
RE
280 # If there is an original and transcoded, delete the transcoded
281 # since it must be of lower quality then the original
282 if self.entry.media_files.get('original') and \
9de4fab4
RE
283 self.entry.media_files.get('webm_video'):
284 self.entry.media_files['webm_video'].delete()
1cefccc7 285
347ef583 286 else:
1cefccc7 287 self.transcoder.transcode(self.process_filename, tmp_dst,
347ef583
RE
288 vp8_quality=vp8_quality,
289 vp8_threads=vp8_threads,
290 vorbis_quality=vorbis_quality,
291 progress_callback=progress_callback,
9b1317e3 292 dimensions=tuple(medium_size))
bd50f8bf
BB
293 if self.transcoder.dst_data:
294 video_info = self.transcoder.dst_data.get_video_streams()[0]
295 dst_dimensions = (video_info.get_width(),
296 video_info.get_height())
297 self._keep_best()
298
299 # Push transcoded video to public storage
300 _log.debug('Saving medium...')
301 store_public(self.entry, 'webm_video', tmp_dst,
302 self.name_builder.fill('{basename}.medium.webm'))
303 _log.debug('Saved medium')
304
305 self.entry.set_file_metadata('webm_video', **file_metadata)
306
307 self.did_transcode = True
308 else:
309 dst_dimensions = orig_dst_dimensions
347ef583
RE
310
311 # Save the width and height of the transcoded video
312 self.entry.media_data_init(
313 width=dst_dimensions[0],
314 height=dst_dimensions[1])
315
316 def generate_thumb(self, thumb_size=None):
317 # Temporary file for the video thumbnail (cleaned up with workbench)
318 tmp_thumb = os.path.join(self.workbench.dir,
319 self.name_builder.fill(
320 '{basename}.thumbnail.jpg'))
321
322 if not thumb_size:
79044027 323 thumb_size = (mgg.global_config['media:thumb']['max_width'],)
347ef583 324
4c617543
RE
325 if self._skip_processing('thumb', thumb_size=thumb_size):
326 return
327
0cdebda7 328 # We will only use the width so that the correct scale is kept
7e266d5a 329 transcoders.capture_thumb(
1cefccc7 330 self.process_filename,
347ef583 331 tmp_thumb,
0cdebda7 332 thumb_size[0])
347ef583 333
f4703ae9
CAW
334 # Checking if the thumbnail was correctly created. If it was not,
335 # then just give up.
336 if not os.path.exists (tmp_thumb):
337 return
338
347ef583
RE
339 # Push the thumbnail to public storage
340 _log.debug('Saving thumbnail...')
341 store_public(self.entry, 'thumb', tmp_thumb,
342 self.name_builder.fill('{basename}.thumbnail.jpg'))
343
4c617543 344 self.entry.set_file_metadata('thumb', thumb_size=thumb_size)
347ef583
RE
345
346class InitialProcessor(CommonVideoProcessor):
347 """
348 Initial processing steps for new video
349 """
350 name = "initial"
351 description = "Initial processing"
352
353 @classmethod
354 def media_is_eligible(cls, entry=None, state=None):
355 if not state:
356 state = entry.state
357 return state in (
358 "unprocessed", "failed")
359
360 @classmethod
361 def generate_parser(cls):
362 parser = argparse.ArgumentParser(
363 description=cls.description,
364 prog=cls.name)
365
366 parser.add_argument(
367 '--medium_size',
368 nargs=2,
369 metavar=('max_width', 'max_height'),
370 type=int)
371
372 parser.add_argument(
373 '--vp8_quality',
374 type=int,
375 help='Range 0..10')
376
377 parser.add_argument(
378 '--vp8_threads',
379 type=int,
380 help='0 means number_of_CPUs - 1')
381
382 parser.add_argument(
383 '--vorbis_quality',
384 type=float,
385 help='Range -0.1..1')
386
387 parser.add_argument(
388 '--thumb_size',
389 nargs=2,
390 metavar=('max_width', 'max_height'),
391 type=int)
392
393 return parser
394
395 @classmethod
396 def args_to_request(cls, args):
397 return request_from_args(
398 args, ['medium_size', 'vp8_quality', 'vp8_threads',
399 'vorbis_quality', 'thumb_size'])
400
401 def process(self, medium_size=None, vp8_threads=None, vp8_quality=None,
402 vorbis_quality=None, thumb_size=None):
403 self.common_setup()
404
405 self.transcode(medium_size=medium_size, vp8_quality=vp8_quality,
406 vp8_threads=vp8_threads, vorbis_quality=vorbis_quality)
407
408 self.copy_original()
409 self.generate_thumb(thumb_size=thumb_size)
410 self.delete_queue_file()
411
412
371bcc24
RE
413class Resizer(CommonVideoProcessor):
414 """
415 Video thumbnail resizing process steps for processed media
416 """
417 name = 'resize'
418 description = 'Resize thumbnail'
3225008f 419 thumb_size = 'thumb_size'
371bcc24
RE
420
421 @classmethod
422 def media_is_eligible(cls, entry=None, state=None):
423 if not state:
424 state = entry.state
425 return state in 'processed'
426
427 @classmethod
428 def generate_parser(cls):
429 parser = argparse.ArgumentParser(
57d1cb3c 430 description=cls.description,
371bcc24
RE
431 prog=cls.name)
432
433 parser.add_argument(
434 '--thumb_size',
435 nargs=2,
436 metavar=('max_width', 'max_height'),
437 type=int)
438
698c7a8b
RE
439 # Needed for gmg reprocess thumbs to work
440 parser.add_argument(
441 'file',
442 nargs='?',
8bb0df62
RE
443 default='thumb',
444 choices=['thumb'])
698c7a8b 445
57d1cb3c
RE
446 return parser
447
371bcc24
RE
448 @classmethod
449 def args_to_request(cls, args):
450 return request_from_args(
698c7a8b 451 args, ['thumb_size', 'file'])
371bcc24 452
698c7a8b 453 def process(self, thumb_size=None, file=None):
371bcc24
RE
454 self.common_setup()
455 self.generate_thumb(thumb_size=thumb_size)
456
457
57d1cb3c
RE
458class Transcoder(CommonVideoProcessor):
459 """
460 Transcoding processing steps for processed video
461 """
462 name = 'transcode'
463 description = 'Re-transcode video'
464
465 @classmethod
466 def media_is_eligible(cls, entry=None, state=None):
467 if not state:
468 state = entry.state
469 return state in 'processed'
470
471 @classmethod
472 def generate_parser(cls):
473 parser = argparse.ArgumentParser(
474 description=cls.description,
475 prog=cls.name)
476
477 parser.add_argument(
478 '--medium_size',
479 nargs=2,
480 metavar=('max_width', 'max_height'),
481 type=int)
482
483 parser.add_argument(
484 '--vp8_quality',
485 type=int,
486 help='Range 0..10')
487
488 parser.add_argument(
489 '--vp8_threads',
490 type=int,
491 help='0 means number_of_CPUs - 1')
492
493 parser.add_argument(
494 '--vorbis_quality',
495 type=float,
496 help='Range -0.1..1')
497
498 return parser
499
500 @classmethod
501 def args_to_request(cls, args):
502 return request_from_args(
503 args, ['medium_size', 'vp8_threads', 'vp8_quality',
504 'vorbis_quality'])
505
506 def process(self, medium_size=None, vp8_quality=None, vp8_threads=None,
507 vorbis_quality=None):
508 self.common_setup()
509 self.transcode(medium_size=medium_size, vp8_threads=vp8_threads,
510 vp8_quality=vp8_quality, vorbis_quality=vorbis_quality)
511
512
347ef583
RE
513class VideoProcessingManager(ProcessingManager):
514 def __init__(self):
1a2982d6 515 super(VideoProcessingManager, self).__init__()
347ef583 516 self.add_processor(InitialProcessor)
371bcc24 517 self.add_processor(Resizer)
57d1cb3c 518 self.add_processor(Transcoder)