Rewrite thumbnailer
authorBoris Bobrov <breton@cynicmansion.ru>
Thu, 29 May 2014 10:50:32 +0000 (14:50 +0400)
committerBoris Bobrov <breton@cynicmansion.ru>
Sun, 15 Feb 2015 02:52:22 +0000 (05:52 +0300)
Previous thumbnailer didn't always work properly. It was also not ready
to be ported to GStreamer 1.0

The rewrite makes it shorter, more pythonic and prepares it for porting.

 - no longer uses playbin2;
 - is tested
 - logs some events
 - previous thumbnailer is removed

mediagoblin/media_types/video/processing.py
mediagoblin/media_types/video/transcoders.py
mediagoblin/tests/test_video.py [new file with mode: 0644]

index bbed4f129e3ddfd18b5702d540cfd8d40380b632..ca9a6ad91b7b196537f735bcfd3f1f72521a4235 100644 (file)
@@ -74,6 +74,13 @@ def store_metadata(media_entry, metadata):
     """
     Store metadata from this video for this media entry.
     """
+    stored_metadata = dict()
+    audio_info_list = metadata.get_audio_streams()
+    if audio_info_list:
+        audio_info = audio_info_list[0]
+        stored_metadata['audiochannels'] = audio_info.get_channels()
+    # video is always there
+    video_info = metadata.get_video_streams()[0]
     # Let's pull out the easy, not having to be converted ones first
     stored_metadata = dict(
         [(key, metadata[key])
@@ -270,7 +277,7 @@ class CommonVideoProcessor(MediaProcessor):
             return
 
         # We will only use the width so that the correct scale is kept
-        transcoders.VideoThumbnailerMarkII(
+        transcoders.capture_thumb(
             self.process_filename,
             tmp_thumb,
             thumb_size[0])
index 3a3fa97f6ecaaa6efd2a41a4654169f58e11ca06..e08b897c692d0180b01c09cb39d1f9e7119aa300 100644 (file)
@@ -55,336 +55,73 @@ os.putenv('GST_DEBUG_DUMP_DOT_DIR', '/tmp')
 
 def pixbuf_to_pilbuf(buf):
     data = list()
-    for i in range(0, len(buf), 3):
-        r, g, b = struct.unpack('BBB', buf[i:i + 3])
+    for i in range(0, len(buf)-4, 4):
+        r, g, b, x = struct.unpack('BBBB', buf[i:i + 4])
+        # XXX: can something be done with the 'X' part of RGBX?
         data.append((r, g, b))
-
     return data
 
-
-class VideoThumbnailerMarkII(object):
-    '''
-    Creates a thumbnail from a video file. Rewrite of VideoThumbnailer.
-
-    Large parts of the functionality and overall architectue contained within
-    this object is taken from Participatory Culture Foundation's
-    `gst_extractor.Extractor` object last seen at
-    https://github.com/pculture/miro/blob/master/tv/lib/frontends/widgets/gst/gst_extractor.py
-    in the `miro` codebase.
-
-    The `miro` codebase and the gst_extractor.py are licensed under the GNU
-    General Public License v2 or later.
-    '''
-    STATE_NULL = 0
-    STATE_HALTING = 1
-    STATE_PROCESSING = 2
-    STATE_PROCESSING_THUMBNAIL = 3
-
-    def __init__(self, source_path, dest_path, width=None, height=None,
-            position_callback=None):
-        self.state = self.STATE_NULL
-
-        self.has_reached_playbin_pause = False
-
-        self.thumbnail_pipeline = None
-
-        self.permission_to_take_picture = False
-
-        self.buffer_probes = {}
-
-        self.errors = []
-
-        self.source_path = os.path.abspath(source_path)
-        self.dest_path = os.path.abspath(dest_path)
-
-        self.width = width
-        self.height = height
-        self.position_callback = position_callback \
-                or self.wadsworth_position_callback
-
-        self.mainloop = gobject.MainLoop()
-
-        self.playbin = gst.element_factory_make('playbin')
-
-        self.videosink = gst.element_factory_make('fakesink', 'videosink')
-        self.audiosink = gst.element_factory_make('fakesink', 'audiosink')
-
-        self.playbin.set_property('video-sink', self.videosink)
-        self.playbin.set_property('audio-sink', self.audiosink)
-
-        self.playbin_message_bus = self.playbin.get_bus()
-
-        self.playbin_message_bus.add_signal_watch()
-        self.playbin_bus_watch_id = self.playbin_message_bus.connect(
-                'message',
-                self.on_playbin_message)
-
-        self.playbin.set_property(
-                'uri',
-                'file:{0}'.format(
-                    urllib.pathname2url(self.source_path)))
-
-        self.playbin.set_state(gst.STATE_PAUSED)
-
-        try:
-            self.run()
-        except Exception as exc:
-            _log.critical(
-                'Exception "{0}" caught, shutting down mainloop and re-raising'\
-                    .format(exc))
-            self.disconnect()
-            raise
-
-    def wadsworth_position_callback(self, duration, gst):
-        return self.duration / 100 * 30
-
-    def run(self):
-        self.mainloop.run()
-
-    def on_playbin_message(self, message_bus, message):
-        # Silenced to prevent clobbering of output
-        #_log.debug('playbin message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('playbin error: {0}'.format(message))
-            gobject.idle_add(self.on_playbin_error)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('playbin state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED:
-                if message.src == self.playbin:
-                    _log.info('playbin ready')
-                    gobject.idle_add(self.on_playbin_paused)
-
-    def on_playbin_paused(self):
-        if self.has_reached_playbin_pause:
-            _log.warn('Has already reached on_playbin_paused. Aborting \
-without doing anything this time.')
-            return False
-
-        self.has_reached_playbin_pause = True
-
-        # XXX: Why is this even needed at this point?
-        current_video = self.playbin.get_property('current-video')
-
-        if not current_video:
-            _log.critical('Could not get any video data \
-from playbin')
-        else:
-            _log.info('Got video data from playbin')
-
-        self.duration = self.get_duration(self.playbin)
-        self.permission_to_take_picture = True
-        self.buffer_probes = {}
-
-        pipeline = ''.join([
-            'filesrc location="%s" ! decodebin2 ! ' % self.source_path,
-            'ffmpegcolorspace ! videoscale ! ',
-            'video/x-raw-rgb,depth=24,bpp=24,pixel-aspect-ratio=1/1',
-            ',width={0}'.format(self.width) if self.width else '',
-            ',height={0}'.format(self.height) if self.height else '',
-            ' ! ',
-            'fakesink signal-handoffs=True'])
-
-        _log.debug('thumbnail_pipeline: {0}'.format(pipeline))
-
-        self.thumbnail_pipeline = gst.parse_launch(pipeline)
-        self.thumbnail_message_bus = self.thumbnail_pipeline.get_bus()
-        self.thumbnail_message_bus.add_signal_watch()
-        self.thumbnail_bus_watch_id = self.thumbnail_message_bus.connect(
-                'message',
-                self.on_thumbnail_message)
-
-        self.thumbnail_pipeline.set_state(gst.STATE_PAUSED)
-
-        gobject.timeout_add(3000, self.on_gobject_timeout)
-
-        return False
-
-    def on_thumbnail_message(self, message_bus, message):
-        # This is silenced to prevent clobbering of the terminal window
-        #_log.debug('thumbnail message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('thumbnail error: {0}'.format(message.parse_error()))
-            gobject.idle_add(self.on_thumbnail_error, message)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('thumbnail state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED and \
-               not self.state == self.STATE_PROCESSING_THUMBNAIL:
-                # Find the fakesink sink pad and attach the on_buffer_probe
-                # handler to it.
-                seek_amount = self.position_callback(self.duration, gst)
-
-                seek_result = self.thumbnail_pipeline.seek(
-                        1.0,
-                        gst.FORMAT_TIME,
-                        gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
-                        gst.SEEK_TYPE_SET,
-                        seek_amount,
-                        gst.SEEK_TYPE_NONE,
-                        0)
-
-                if not seek_result:
-                    _log.info('Could not seek.')
-                else:
-                    _log.info('Seek successful, attaching buffer probe')
-                    self.state = self.STATE_PROCESSING_THUMBNAIL
-                    for sink in self.thumbnail_pipeline.sinks():
-                        sink_name = sink.get_name()
-                        sink_factory_name = sink.get_factory().get_name()
-
-                        if sink_factory_name == 'fakesink':
-                            sink_pad = sink.get_pad('sink')
-
-                            self.buffer_probes[sink_name] = sink_pad\
-                                    .add_buffer_probe(
-                                            self.on_pad_buffer_probe,
-                                            sink_name)
-
-                            _log.info('Attached buffer probes: {0}'.format(
-                                self.buffer_probes))
-
-                            break
-
-
-            elif self.state == self.STATE_PROCESSING_THUMBNAIL:
-                _log.info('Already processing thumbnail')
-
-    def on_pad_buffer_probe(self, *args):
-        _log.debug('buffer probe handler: {0}'.format(args))
-        gobject.idle_add(lambda: self.take_snapshot(*args))
-
-    def take_snapshot(self, pad, buff, name):
-        if self.state == self.STATE_HALTING:
-            _log.debug('Pipeline is halting, will not take snapshot')
-            return False
-
-        _log.info('Taking snapshot! ({0})'.format(
-            (pad, buff, name)))
-        try:
-            caps = buff.caps
-            if caps is None:
-                _log.error('No buffer caps present /take_snapshot')
-                self.disconnect()
-
-            _log.debug('caps: {0}'.format(caps))
-
-            filters = caps[0]
-            width = filters['width']
-            height = filters['height']
-
-            im = Image.new('RGB', (width, height))
-
-            data = pixbuf_to_pilbuf(buff.data)
-
-            im.putdata(data)
-
-            im.save(self.dest_path)
-
-            _log.info('Saved snapshot!')
-
-            self.disconnect()
-
-        except gst.QueryError as exc:
-            _log.error('take_snapshot - QueryError: {0}'.format(exc))
-
-        return False
-
-    def on_thumbnail_error(self, message):
-        scaling_failed = False
-
-        if 'Error calculating the output scaled size - integer overflow' \
-           in message.parse_error()[1]:
-            # GStreamer videoscale sometimes fails to calculate the dimensions
-            # given only one of the destination dimensions and the source
-            # dimensions. This is a workaround in case videoscale returns an
-            # error that indicates this has happened.
-            scaling_failed = True
-            _log.error('Thumbnailing failed because of videoscale integer'
-                       ' overflow. Will retry with fallback.')
-        else:
-            _log.error('Thumbnailing failed: {0}'.format(message.parse_error()))
-
-        # Kill the current mainloop
-        self.disconnect()
-
-        if scaling_failed:
-            # Manually scale the destination dimensions
-            _log.info('Retrying with manually set sizes...')
-
-            info = VideoTranscoder().discover(self.source_path)
-
-            h = info['videoheight']
-            w = info['videowidth']
-            ratio = 180 / int(w)
-            h = int(h * ratio)
-
-            self.__init__(self.source_path, self.dest_path, 180, h)
-
-    def disconnect(self):
-        self.state = self.STATE_HALTING
-
-        if self.playbin is not None:
-            self.playbin.set_state(gst.STATE_NULL)
-
-            for sink in self.playbin.sinks():
-                sink_name = sink.get_name()
-                sink_factory_name = sink.get_factory().get_name()
-
-                if sink_factory_name == 'fakesink':
-                    sink_pad = sink.get_pad('sink')
-                    sink_pad.remove_buffer_probe(self.buffer_probes[sink_name])
-                    del self.buffer_probes[sink_name]
-
-            self.playbin = None
-
-        if self.thumbnail_pipeline is not None:
-            self.thumbnail_pipeline.set_state(gst.STATE_NULL)
-            self.thumbnail_pipeline = None
-
-        if self.playbin_message_bus is not None:
-            self.playbin_message_bus.disconnect(self.playbin_bus_watch_id)
-            self.playbin_message_bus = None
-
-        self.halt()
-
-    def halt(self):
-        gobject.idle_add(self.mainloop.quit)
-
-    def on_gobject_timeout(self):
-        _log.critical('Reached gobject timeout')
-        self.disconnect()
-
-    def get_duration(self, pipeline, attempt=1):
-        if attempt == 5:
-            _log.critical('Pipeline duration query retry limit reached.')
-            return 0
-
-        try:
-            return pipeline.query_duration(gst.FORMAT_TIME)[0]
-        except gst.QueryError as exc:
-            _log.error('Could not get duration on attempt {0}: {1}'.format(
-                attempt,
-                exc))
-            return self.get_duration(pipeline, attempt + 1)
+def capture_thumb(video_path, dest_path, width=None, height=None, percent=0.5):
+    def pad_added(element, pad, connect_to):
+        caps = pad.get_caps()
+        name = caps[0].get_name()
+        _log.debug('on_pad_added: {0}'.format(name))
+        if name.startswith('video') and not connect_to.is_linked():
+            pad.link(connect_to)
+    # construct pipeline: uridecodebin ! ffmpegcolorspace ! videoscale ! \
+    # ! CAPS ! appsink
+    pipeline = gst.Pipeline()
+    uridecodebin = gst.element_factory_make('uridecodebin')
+    uridecodebin.set_property('uri', 'file://{0}'.format(video_path))
+    ffmpegcolorspace = gst.element_factory_make('ffmpegcolorspace')
+    uridecodebin.connect('pad-added', pad_added,
+                         ffmpegcolorspace.get_pad('sink'))
+    videoscale = gst.element_factory_make('videoscale')
+    filter = gst.element_factory_make('capsfilter', 'filter')
+    # create caps for video scaling
+    caps_struct = gst.Structure('video/x-raw-rgb')
+    caps_struct.set_value('pixel-aspect-ratio', gst.Fraction(1, 1))
+    if height:
+        caps_struct.set_value('height', height)
+    if width:
+        caps_struct.set_value('width', width)
+    caps = gst.Caps(caps_struct)
+    filter.set_property('caps', caps)
+    appsink = gst.element_factory_make('appsink')
+    pipeline.add(uridecodebin, ffmpegcolorspace, videoscale, filter, appsink)
+    gst.element_link_many(ffmpegcolorspace, videoscale, filter, appsink)
+    # pipeline constructed, starting playing, but first some preparations
+    if pipeline.set_state(gst.STATE_PAUSED) == gst.STATE_CHANGE_FAILURE:
+        _log.warning('state change failed')
+    pipeline.get_state()
+    duration = pipeline.query_duration(gst.FORMAT_TIME, None)[0]
+    if duration == gst.CLOCK_TIME_NONE:
+        _log.warning('query_duration failed')
+        duration = 0  # XXX
+    seek_to = int(duration * int(percent * 100) / 100)
+    _log.debug('Seeking to {0} of {1}'.format(
+            seek_to / gst.SECOND, duration / gst.SECOND))
+    seek = pipeline.seek_simple(gst.FORMAT_TIME, gst.SEEK_FLAG_FLUSH, seek_to)
+    if not seek:
+        _log.warning('seek failed')
+    # get sample, retrieve it's format and save
+    sample = appsink.emit("pull-preroll")
+    if not sample:
+        _log.warning('could not get sample')
+        return
+    caps = sample.get_caps()
+    if not caps:
+        _log.warning('could not get snapshot format')
+    structure = caps.get_structure(0)
+    (success, width) = structure.get_int('width')
+    (success, height) = structure.get_int('height')
+    buffer = sample.get_buffer()
+    im = Image.frombytes('RGB', (width, height),
+                         buffer.extract_dup(0, buffer.get_size()))
+    im.save(dest_path)
+    _log.info('thumbnail saved to {0}'.format(dest_path))
+    # cleanup
+    pipeline.set_state(gst.STATE_NULL)
 
 
 class VideoTranscoder(object):
@@ -451,7 +188,6 @@ class VideoTranscoder(object):
         self.discoverer.discover()
 
         self.loop.run()
-
         if hasattr(self, '_discovered_data'):
             return self._discovered_data.__dict__
         else:
@@ -729,7 +465,6 @@ class VideoTranscoder(object):
 
 if __name__ == '__main__':
     os.nice(19)
-    logging.basicConfig()
     from optparse import OptionParser
 
     parser = OptionParser(
diff --git a/mediagoblin/tests/test_video.py b/mediagoblin/tests/test_video.py
new file mode 100644 (file)
index 0000000..0fe58f6
--- /dev/null
@@ -0,0 +1,71 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import tempfile
+import shutil
+import os
+import pytest
+from contextlib import contextmanager
+import logging
+import imghdr
+
+#TODO: this should be skipped if video plugin is not enabled
+import pygst
+pygst.require('0.10')
+import gst
+
+from mediagoblin.media_types.video.transcoders import capture_thumb
+
+@contextmanager
+def create_data(suffix):
+    video = tempfile.NamedTemporaryFile()
+    src = gst.element_factory_make('videotestsrc')
+    src.set_property('num-buffers', 50)
+    enc = gst.element_factory_make('theoraenc')
+    mux = gst.element_factory_make('oggmux')
+    dst = gst.element_factory_make('filesink')
+    dst.set_property('location', video.name)
+    pipeline = gst.Pipeline()
+    pipeline.add(src, enc, mux, dst)
+    gst.element_link_many(src, enc, mux, dst)
+    pipeline.set_state(gst.STATE_PLAYING)
+    # wait for finish
+    bus = pipeline.get_bus()
+    message = bus.timed_pop_filtered(gst.CLOCK_TIME_NONE,
+                                     gst.MESSAGE_ERROR | gst.MESSAGE_EOS)
+    thumb = tempfile.NamedTemporaryFile(suffix=suffix)
+    pipeline.set_state(gst.STATE_NULL)
+    yield (video.name, thumb.name)
+
+
+#TODO: this should be skipped if video plugin is not enabled
+def test_thumbnails():
+    '''
+    Test thumbnails generation.
+    1. Create a video from gst's videotestsrc
+    3. Capture thumbnail
+    4. Remove it
+    '''
+    #data  create_data() as (video_name, thumbnail_name):
+    test_formats = [('.png', 'png'), ('.jpg', 'jpeg'), ('.gif', 'gif')]
+    for suffix, format in test_formats:
+        with create_data(suffix) as (video_name, thumbnail_name):
+            capture_thumb(video_name, thumbnail_name, width=40)
+            # check if png
+            assert imghdr.what(thumbnail_name) == format
+            # TODO: check height and width
+            # FIXME: it doesn't work with small width, say, 10px. This should be
+            # fixed somehow