Audio thumbnailing & spectrograms, media plugins use sniffing
authorJoar Wandborg <git@wandborg.com>
Tue, 28 Feb 2012 20:59:38 +0000 (21:59 +0100)
committerJoar Wandborg <git@wandborg.com>
Tue, 28 Feb 2012 20:59:38 +0000 (21:59 +0100)
* Added extlib/freesound/audioprocessing.py
* config_spec
  * Added create_spectrogram setting
  * Added media:medium and media:thumb max_{width,height} settings
* Added sniffing logic to
  - audio.processing:sniff_handler
  - video.processing:sniff_handler
* Changed audio.processing:sniff_handler logic
* Added audio thumbnailing functionality to audio.processing
  (works only with create_spectrogram enabled)
* Refractored contexts in audio.processing
* Added audio.transcoders:AudioThumbnailer
  Used for creating spectrograms and spectrogram thumbnails -
  Wadsworth's Constant, we meet again :)
* audio.transcoders:AudioTranscoder
  - Added mux_string kwarg
  - Delete self.pipeline on self.halt()
* Changed str.format formatting in image.processing:sniff_handler
  Had {1} without an {0}, changed to {0}
* Refractored VideoTranscoder to use transcode() for transcoding instead
  of __init__()
* Added discover() method to video.transcoders:VideoTranscoder
* Added spectrogram display to media_displays/audio.html
* Updated test_submission to reflect changes in media plugin delegation

extlib/freesound/audioprocessing.py [new file with mode: 0644]
mediagoblin/config_spec.ini
mediagoblin/media_types/ascii/processing.py
mediagoblin/media_types/audio/audioprocessing.py [new symlink]
mediagoblin/media_types/audio/processing.py
mediagoblin/media_types/audio/transcoders.py
mediagoblin/media_types/image/processing.py
mediagoblin/media_types/video/processing.py
mediagoblin/media_types/video/transcoders.py
mediagoblin/templates/mediagoblin/media_displays/audio.html
mediagoblin/tests/test_submission.py

diff --git a/extlib/freesound/audioprocessing.py b/extlib/freesound/audioprocessing.py
new file mode 100644 (file)
index 0000000..2c2b35b
--- /dev/null
@@ -0,0 +1,616 @@
+#!/usr/bin/env python
+# processing.py -- various audio processing functions
+# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
+#                    UNIVERSITAT POMPEU FABRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+#   Bram de Jong <bram.dejong at domain.com where domain in gmail>
+#   2012, Joar Wandborg <first name at last name dot se>
+
+from PIL import Image, ImageDraw, ImageColor #@UnresolvedImport
+from functools import partial
+import math
+import numpy
+import os
+import re
+import signal
+
+
+def get_sound_type(input_filename):
+    sound_type = os.path.splitext(input_filename.lower())[1].strip(".")
+
+    if sound_type == "fla":
+        sound_type = "flac"
+    elif sound_type == "aif":
+        sound_type = "aiff"
+
+    return sound_type
+
+
+try:
+    import scikits.audiolab as audiolab
+except ImportError:
+    print "WARNING: audiolab is not installed so wav2png will not work"
+import subprocess
+
+class AudioProcessingException(Exception):
+    pass
+
+class TestAudioFile(object):
+    """A class that mimics audiolab.sndfile but generates noise instead of reading
+    a wave file. Additionally it can be told to have a "broken" header and thus crashing
+    in the middle of the file. Also useful for testing ultra-short files of 20 samples."""
+    def __init__(self, num_frames, has_broken_header=False):
+        self.seekpoint = 0
+        self.nframes = num_frames
+        self.samplerate = 44100
+        self.channels = 1
+        self.has_broken_header = has_broken_header
+
+    def seek(self, seekpoint):
+        self.seekpoint = seekpoint
+
+    def read_frames(self, frames_to_read):
+        if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2:
+            raise RuntimeError()
+
+        num_frames_left = self.num_frames - self.seekpoint
+        will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read
+        self.seekpoint += will_read
+        return numpy.random.random(will_read)*2 - 1
+
+
+def get_max_level(filename):
+    max_value = 0
+    buffer_size = 4096
+    audio_file = audiolab.Sndfile(filename, 'r')
+    n_samples_left = audio_file.nframes
+
+    while n_samples_left:
+        to_read = min(buffer_size, n_samples_left)
+
+        try:
+            samples = audio_file.read_frames(to_read)
+        except RuntimeError:
+            # this can happen with a broken header
+            break
+
+        # convert to mono by selecting left channel only
+        if audio_file.channels > 1:
+            samples = samples[:,0]
+
+        max_value = max(max_value, numpy.abs(samples).max())
+
+        n_samples_left -= to_read
+
+    audio_file.close()
+
+    return max_value
+
+class AudioProcessor(object):
+    """
+    The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
+    samples in that chunk of audio.
+    """
+    def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
+        max_level = get_max_level(input_filename)
+
+        self.audio_file = audiolab.Sndfile(input_filename, 'r')
+        self.fft_size = fft_size
+        self.window = window_function(self.fft_size)
+        self.spectrum_range = None
+        self.lower = 100
+        self.higher = 22050
+        self.lower_log = math.log10(self.lower)
+        self.higher_log = math.log10(self.higher)
+        self.clip = lambda val, low, high: min(high, max(low, val))
+
+        # figure out what the maximum value is for an FFT doing the FFT of a DC signal
+        fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
+        max_fft = (numpy.abs(fft)).max()
+        # set the scale to normalized audio and normalized FFT
+        self.scale = 1.0/max_level/max_fft if max_level > 0 else 1
+
+    def read(self, start, size, resize_if_less=False):
+        """ read size samples starting at start, if resize_if_less is True and less than size
+        samples are read, resize the array to size and fill with zeros """
+
+        # number of zeros to add to start and end of the buffer
+        add_to_start = 0
+        add_to_end = 0
+
+        if start < 0:
+            # the first FFT window starts centered around zero
+            if size + start <= 0:
+                return numpy.zeros(size) if resize_if_less else numpy.array([])
+            else:
+                self.audio_file.seek(0)
+
+                add_to_start = -start # remember: start is negative!
+                to_read = size + start
+
+                if to_read > self.audio_file.nframes:
+                    add_to_end = to_read - self.audio_file.nframes
+                    to_read = self.audio_file.nframes
+        else:
+            self.audio_file.seek(start)
+
+            to_read = size
+            if start + to_read >= self.audio_file.nframes:
+                to_read = self.audio_file.nframes - start
+                add_to_end = size - to_read
+
+        try:
+            samples = self.audio_file.read_frames(to_read)
+        except RuntimeError:
+            # this can happen for wave files with broken headers...
+            return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
+
+        # convert to mono by selecting left channel only
+        if self.audio_file.channels > 1:
+            samples = samples[:,0]
+
+        if resize_if_less and (add_to_start > 0 or add_to_end > 0):
+            if add_to_start > 0:
+                samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
+
+            if add_to_end > 0:
+                samples = numpy.resize(samples, size)
+                samples[size - add_to_end:] = 0
+
+        return samples
+
+
+    def spectral_centroid(self, seek_point, spec_range=110.0):
+        """ starting at seek_point read fft_size samples, and calculate the spectral centroid """
+
+        samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
+
+        samples *= self.window
+        fft = numpy.fft.rfft(samples)
+        spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1
+        length = numpy.float64(spectrum.shape[0])
+
+        # scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
+        db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
+
+        energy = spectrum.sum()
+        spectral_centroid = 0
+
+        if energy > 1e-60:
+            # calculate the spectral centroid
+
+            if self.spectrum_range == None:
+                self.spectrum_range = numpy.arange(length)
+
+            spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
+
+            # clip > log10 > scale between 0 and 1
+            spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
+
+        return (spectral_centroid, db_spectrum)
+
+
+    def peaks(self, start_seek, end_seek):
+        """ read all samples between start_seek and end_seek, then find the minimum and maximum peak
+        in that range. Returns that pair in the order they were found. So if min was found first,
+        it returns (min, max) else the other way around. """
+
+        # larger blocksizes are faster but take more mem...
+        # Aha, Watson, a clue, a tradeof!
+        block_size = 4096
+
+        max_index = -1
+        max_value = -1
+        min_index = -1
+        min_value = 1
+
+        if start_seek < 0:
+            start_seek = 0
+
+        if end_seek > self.audio_file.nframes:
+            end_seek = self.audio_file.nframes
+
+        if end_seek <= start_seek:
+            samples = self.read(start_seek, 1)
+            return (samples[0], samples[0])
+
+        if block_size > end_seek - start_seek:
+            block_size = end_seek - start_seek
+
+        for i in range(start_seek, end_seek, block_size):
+            samples = self.read(i, block_size)
+
+            local_max_index = numpy.argmax(samples)
+            local_max_value = samples[local_max_index]
+
+            if local_max_value > max_value:
+                max_value = local_max_value
+                max_index = local_max_index
+
+            local_min_index = numpy.argmin(samples)
+            local_min_value = samples[local_min_index]
+
+            if local_min_value < min_value:
+                min_value = local_min_value
+                min_index = local_min_index
+
+        return (min_value, max_value) if min_index < max_index else (max_value, min_value)
+
+
+def interpolate_colors(colors, flat=False, num_colors=256):
+    """ given a list of colors, create a larger list of colors interpolating
+    the first one. If flatten is True a list of numers will be returned. If
+    False, a list of (r,g,b) tuples. num_colors is the number of colors wanted
+    in the final list """
+
+    palette = []
+
+    for i in range(num_colors):
+        index = (i * (len(colors) - 1))/(num_colors - 1.0)
+        index_int = int(index)
+        alpha = index - float(index_int)
+
+        if alpha > 0:
+            r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0]
+            g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1]
+            b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2]
+        else:
+            r = (1.0 - alpha) * colors[index_int][0]
+            g = (1.0 - alpha) * colors[index_int][1]
+            b = (1.0 - alpha) * colors[index_int][2]
+
+        if flat:
+            palette.extend((int(r), int(g), int(b)))
+        else:
+            palette.append((int(r), int(g), int(b)))
+
+    return palette
+
+
+def desaturate(rgb, amount):
+    """
+        desaturate colors by amount
+        amount == 0, no change
+        amount == 1, grey
+    """
+    luminosity = sum(rgb) / 3.0
+    desat = lambda color: color - amount * (color - luminosity)
+
+    return tuple(map(int, map(desat, rgb)))
+
+
+class WaveformImage(object):
+    """
+    Given peaks and spectral centroids from the AudioProcessor, this class will construct
+    a wavefile image which can be saved as PNG.
+    """
+    def __init__(self, image_width, image_height, palette=1):
+        if image_height % 2 == 0:
+            raise AudioProcessingException, "Height should be uneven: images look much better at uneven height"
+
+        if palette == 1:
+            background_color = (0,0,0)
+            colors = [
+                        (50,0,200),
+                        (0,220,80),
+                        (255,224,0),
+                        (255,70,0),
+                     ]
+        elif palette == 2:
+            background_color = (0,0,0)
+            colors = [self.color_from_value(value/29.0) for value in range(0,30)]
+        elif palette == 3:
+            background_color = (213, 217, 221)
+            colors = map( partial(desaturate, amount=0.7), [
+                        (50,0,200),
+                        (0,220,80),
+                        (255,224,0),
+                     ])
+        elif palette == 4:
+            background_color = (213, 217, 221)
+            colors = map( partial(desaturate, amount=0.8), [self.color_from_value(value/29.0) for value in range(0,30)])
+
+        self.image = Image.new("RGB", (image_width, image_height), background_color)
+
+        self.image_width = image_width
+        self.image_height = image_height
+
+        self.draw = ImageDraw.Draw(self.image)
+        self.previous_x, self.previous_y = None, None
+
+        self.color_lookup = interpolate_colors(colors)
+        self.pix = self.image.load()
+
+    def color_from_value(self, value):
+        """ given a value between 0 and 1, return an (r,g,b) tuple """
+
+        return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50))
+
+    def draw_peaks(self, x, peaks, spectral_centroid):
+        """ draw 2 peaks at x using the spectral_centroid for color """
+
+        y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5
+        y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5
+
+        line_color = self.color_lookup[int(spectral_centroid*255.0)]
+
+        if self.previous_y != None:
+            self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color)
+        else:
+            self.draw.line([x, y1, x, y2], line_color)
+
+        self.previous_x, self.previous_y = x, y2
+
+        self.draw_anti_aliased_pixels(x, y1, y2, line_color)
+
+    def draw_anti_aliased_pixels(self, x, y1, y2, color):
+        """ vertical anti-aliasing at y1 and y2 """
+
+        y_max = max(y1, y2)
+        y_max_int = int(y_max)
+        alpha = y_max - y_max_int
+
+        if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height:
+            current_pix = self.pix[x, y_max_int + 1]
+
+            r = int((1-alpha)*current_pix[0] + alpha*color[0])
+            g = int((1-alpha)*current_pix[1] + alpha*color[1])
+            b = int((1-alpha)*current_pix[2] + alpha*color[2])
+
+            self.pix[x, y_max_int + 1] = (r,g,b)
+
+        y_min = min(y1, y2)
+        y_min_int = int(y_min)
+        alpha = 1.0 - (y_min - y_min_int)
+
+        if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0:
+            current_pix = self.pix[x, y_min_int - 1]
+
+            r = int((1-alpha)*current_pix[0] + alpha*color[0])
+            g = int((1-alpha)*current_pix[1] + alpha*color[1])
+            b = int((1-alpha)*current_pix[2] + alpha*color[2])
+
+            self.pix[x, y_min_int - 1] = (r,g,b)
+
+    def save(self, filename):
+        # draw a zero "zero" line
+        a = 25
+        for x in range(self.image_width):
+            self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2]))
+
+        self.image.save(filename)
+
+
+class SpectrogramImage(object):
+    """
+    Given spectra from the AudioProcessor, this class will construct a wavefile image which
+    can be saved as PNG.
+    """
+    def __init__(self, image_width, image_height, fft_size):
+        self.image_width = image_width
+        self.image_height = image_height
+        self.fft_size = fft_size
+
+        self.image = Image.new("RGBA", (image_height, image_width))
+
+        colors = [
+            (0, 0, 0, 0),
+            (58/4, 68/4, 65/4, 255),
+            (80/2, 100/2, 153/2, 255),
+            (90, 180, 100, 255),
+            (224, 224, 44, 255),
+            (255, 60, 30, 255),
+            (255, 255, 255, 255)
+         ]
+        self.palette = interpolate_colors(colors)
+
+        # generate the lookup which translates y-coordinate to fft-bin
+        self.y_to_bin = []
+        f_min = 100.0
+        f_max = 22050.0
+        y_min = math.log10(f_min)
+        y_max = math.log10(f_max)
+        for y in range(self.image_height):
+            freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min))
+            bin = freq / 22050.0 * (self.fft_size/2 + 1)
+
+            if bin < self.fft_size/2:
+                alpha = bin - int(bin)
+
+                self.y_to_bin.append((int(bin), alpha * 255))
+
+        # this is a bit strange, but using image.load()[x,y] = ... is
+        # a lot slower than using image.putadata and then rotating the image
+        # so we store all the pixels in an array and then create the image when saving
+        self.pixels = []
+
+    def draw_spectrum(self, x, spectrum):
+        # for all frequencies, draw the pixels
+        for (index, alpha) in self.y_to_bin:
+            self.pixels.append( self.palette[int((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1])] )
+
+        # if the FFT is too small to fill up the image, fill with black to the top
+        for y in range(len(self.y_to_bin), self.image_height): #@UnusedVariable
+            self.pixels.append(self.palette[0])
+
+    def save(self, filename, quality=80):
+        assert filename.lower().endswith(".jpg")
+        self.image.putdata(self.pixels)
+        self.image.transpose(Image.ROTATE_90).save(filename, quality=quality)
+
+
+def create_wave_images(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, progress_callback=None):
+    """
+    Utility function for creating both wavefile and spectrum images from an audio input file.
+    """
+    processor = AudioProcessor(input_filename, fft_size, numpy.hanning)
+    samples_per_pixel = processor.audio_file.nframes / float(image_width)
+
+    waveform = WaveformImage(image_width, image_height)
+    spectrogram = SpectrogramImage(image_width, image_height, fft_size)
+
+    for x in range(image_width):
+
+        if progress_callback and x % (image_width/10) == 0:
+            progress_callback((x*100)/image_width)
+
+        seek_point = int(x * samples_per_pixel)
+        next_seek_point = int((x + 1) * samples_per_pixel)
+
+        (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
+        peaks = processor.peaks(seek_point, next_seek_point)
+
+        waveform.draw_peaks(x, peaks, spectral_centroid)
+        spectrogram.draw_spectrum(x, db_spectrum)
+
+    if progress_callback:
+        progress_callback(100)
+
+    waveform.save(output_filename_w)
+    spectrogram.save(output_filename_s)
+
+
+class NoSpaceLeftException(Exception):
+    pass
+
+def convert_to_pcm(input_filename, output_filename):
+    """
+    converts any audio file type to pcm audio
+    """
+
+    if not os.path.exists(input_filename):
+        raise AudioProcessingException, "file %s does not exist" % input_filename
+
+    sound_type = get_sound_type(input_filename)
+
+    if sound_type == "mp3":
+        cmd = ["lame", "--decode", input_filename, output_filename]
+    elif sound_type == "ogg":
+        cmd = ["oggdec", input_filename, "-o", output_filename]
+    elif sound_type == "flac":
+        cmd = ["flac", "-f", "-d", "-s", "-o", output_filename, input_filename]
+    else:
+        return False
+
+    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdout, stderr) = process.communicate()
+
+    if process.returncode != 0 or not os.path.exists(output_filename):
+        if "No space left on device" in stderr + " " + stdout:
+            raise NoSpaceLeftException
+        raise AudioProcessingException, "failed converting to pcm data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout
+
+    return True
+
+
+def stereofy_and_find_info(stereofy_executble_path, input_filename, output_filename):
+    """
+    converts a pcm wave file to two channel, 16 bit integer
+    """
+
+    if not os.path.exists(input_filename):
+        raise AudioProcessingException, "file %s does not exist" % input_filename
+
+    cmd = [stereofy_executble_path, "--input", input_filename, "--output", output_filename]
+
+    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdout, stderr) = process.communicate()
+
+    if process.returncode != 0 or not os.path.exists(output_filename):
+        if "No space left on device" in stderr + " " + stdout:
+            raise NoSpaceLeftException
+        raise AudioProcessingException, "failed calling stereofy data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout
+
+    stdout = (stdout + " " + stderr).replace("\n", " ")
+
+    duration = 0
+    m = re.match(r".*#duration (?P<duration>[\d\.]+).*",  stdout)
+    if m != None:
+        duration = float(m.group("duration"))
+
+    channels = 0
+    m = re.match(r".*#channels (?P<channels>\d+).*", stdout)
+    if m != None:
+        channels = float(m.group("channels"))
+
+    samplerate = 0
+    m = re.match(r".*#samplerate (?P<samplerate>\d+).*", stdout)
+    if m != None:
+        samplerate = float(m.group("samplerate"))
+
+    bitdepth = None
+    m = re.match(r".*#bitdepth (?P<bitdepth>\d+).*", stdout)
+    if m != None:
+        bitdepth = float(m.group("bitdepth"))
+
+    bitrate = (os.path.getsize(input_filename) * 8.0) / 1024.0 / duration if duration > 0 else 0
+
+    return dict(duration=duration, channels=channels, samplerate=samplerate, bitrate=bitrate, bitdepth=bitdepth)
+
+
+def convert_to_mp3(input_filename, output_filename, quality=70):
+    """
+    converts the incoming wave file to a mp3 file
+    """
+
+    if not os.path.exists(input_filename):
+        raise AudioProcessingException, "file %s does not exist" % input_filename
+
+    command = ["lame", "--silent", "--abr", str(quality), input_filename, output_filename]
+
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdout, stderr) = process.communicate()
+
+    if process.returncode != 0 or not os.path.exists(output_filename):
+        raise AudioProcessingException, stdout
+
+def convert_to_ogg(input_filename, output_filename, quality=1):
+    """
+    converts the incoming wave file to n ogg file
+    """
+
+    if not os.path.exists(input_filename):
+        raise AudioProcessingException, "file %s does not exist" % input_filename
+
+    command = ["oggenc", "-q", str(quality), input_filename, "-o", output_filename]
+
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdout, stderr) = process.communicate()
+
+    if process.returncode != 0 or not os.path.exists(output_filename):
+        raise AudioProcessingException, stdout
+
+def convert_using_ffmpeg(input_filename, output_filename):
+    """
+    converts the incoming wave file to stereo pcm using fffmpeg
+    """
+    TIMEOUT = 3 * 60
+    def  alarm_handler(signum, frame):
+        raise AudioProcessingException, "timeout while waiting for ffmpeg"
+
+    if not os.path.exists(input_filename):
+        raise AudioProcessingException, "file %s does not exist" % input_filename
+
+    command = ["ffmpeg", "-y", "-i", input_filename, "-ac","1","-acodec", "pcm_s16le", "-ar", "44100", output_filename]
+
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    signal.signal(signal.SIGALRM,alarm_handler)
+    signal.alarm(TIMEOUT)
+    (stdout, stderr) = process.communicate()
+    signal.alarm(0)
+    if process.returncode != 0 or not os.path.exists(output_filename):
+        raise AudioProcessingException, stdout
index 452d974571cd489b07ccb570a3c39beaa6b16133..b429677c7554a6bf15dafbf0ab822b2e19d65729 100644 (file)
@@ -65,6 +65,14 @@ base_url = string(default="/mgoblin_media/")
 storage_class = string(default="mediagoblin.storage.filestorage:BasicFileStorage")
 base_dir = string(default="%(here)s/user_dev/media/queue")
 
+[media:medium]
+max_width = integer(default=640)
+max_height = integer(default=640)
+
+[media:thumb]
+max_width = integer(default=180)
+max_height = integer(default=180)
+
 [media_type:mediagoblin.media_types.video]
 # Should we keep the original file?
 keep_original = boolean(default=False)
@@ -72,6 +80,7 @@ keep_original = boolean(default=False)
 [media_type:mediagoblin.media_types.audio]
 # vorbisenc qualiy
 quality = float(default=0.3)
+create_spectrogram = boolean(default=False)
 
 
 [beaker.cache]
index f698b97ac88ff525597f458cc8edec6796a03367..75184c1fffc73bdbc6edf20f2596074cd035b7f6 100644 (file)
@@ -24,7 +24,16 @@ from mediagoblin.media_types.ascii import asciitoimage
 
 _log = logging.getLogger(__name__)
 
+SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo']
+
 def sniff_handler(media_file, **kw):
+    if not kw.get('media') == None:
+        name, ext = os.path.splitext(kw['media'].filename)
+        clean_ext = ext[1:].lower()
+
+        if clean_ext in SUPPORTED_EXTENSIONS:
+            return True
+
     return False
 
 def process_ascii(entry):
diff --git a/mediagoblin/media_types/audio/audioprocessing.py b/mediagoblin/media_types/audio/audioprocessing.py
new file mode 120000 (symlink)
index 0000000..c5e3c52
--- /dev/null
@@ -0,0 +1 @@
+../../../extlib/freesound/audioprocessing.py
\ No newline at end of file
index 7aa7ace8fdb2e07c35ed750b0957fad5cb1eac98..6769f60586046a69b7885b981a62e5429742b6de 100644 (file)
@@ -21,9 +21,10 @@ import os
 from mediagoblin import mg_globals as mgg
 from mediagoblin.processing import create_pub_filepath
 
-from mediagoblin.media_types.audio.transcoders import AudioTranscoder
+from mediagoblin.media_types.audio.transcoders import AudioTranscoder, \
+    AudioThumbnailer
 
-_log = logging.getLogger()
+_log = logging.getLogger(__name__)
 
 def sniff_handler(media_file, **kw):
     transcoder = AudioTranscoder()
@@ -33,7 +34,9 @@ def sniff_handler(media_file, **kw):
         if data.is_audio == True and data.is_video == False:
             return True
     except:
-        return False
+        pass
+
+    return False
 
 def process_audio(entry):
     audio_config = mgg.global_config['media_type:mediagoblin.media_types.audio']
@@ -51,10 +54,9 @@ def process_audio(entry):
             original=os.path.splitext(
                 queued_filepath[-1])[0]))
 
-    ogg_tmp = tempfile.NamedTemporaryFile()
+    transcoder = AudioTranscoder()
 
-    with ogg_tmp:
-        transcoder = AudioTranscoder()
+    with tempfile.NamedTemporaryFile() as ogg_tmp:
 
         transcoder.transcode(
             queued_filename,
@@ -72,11 +74,54 @@ def process_audio(entry):
         entry.media_data['audio'] = {
             u'length': int(data.audiolength)}
 
-    thumbnail_tmp = tempfile.NamedTemporaryFile()
-
-    with thumbnail_tmp:
+    if audio_config['create_spectrogram']:
+        spectrogram_filepath = create_pub_filepath(
+            entry,
+            '{original}-spectrogram.jpg'.format(
+                original=os.path.splitext(
+                    queued_filepath[-1])[0]))
+
+        with tempfile.NamedTemporaryFile(suffix='.wav') as wav_tmp:
+            _log.info('Creating WAV source for spectrogram')
+            transcoder.transcode(
+                queued_filename,
+                wav_tmp.name,
+                mux_string='wavenc')
+
+            thumbnailer = AudioThumbnailer()
+
+            with tempfile.NamedTemporaryFile(suffix='.jpg') as spectrogram_tmp:
+                thumbnailer.spectrogram(
+                    wav_tmp.name,
+                    spectrogram_tmp.name,
+                    width=mgg.global_config['media:medium']['max_width'])
+
+                _log.debug('Saving spectrogram...')
+                mgg.public_store.get_file(spectrogram_filepath, 'wb').write(
+                    spectrogram_tmp.read())
+
+                entry.media_files['spectrogram'] = spectrogram_filepath
+
+                with tempfile.NamedTemporaryFile(suffix='.jpg') as thumb_tmp:
+                    thumbnailer.thumbnail_spectrogram(
+                        spectrogram_tmp.name,
+                        thumb_tmp.name,
+                        (mgg.global_config['media:thumb']['max_width'],
+                         mgg.global_config['media:thumb']['max_height']))
+
+                    thumb_filepath = create_pub_filepath(
+                        entry,
+                        '{original}-thumbnail.jpg'.format(
+                            original=os.path.splitext(
+                                queued_filepath[-1])[0]))
+
+                    mgg.public_store.get_file(thumb_filepath, 'wb').write(
+                        thumb_tmp.read())
+
+                    entry.media_files['thumb'] = thumb_filepath
+    else:
         entry.media_files['thumb'] = ['fake', 'thumb', 'path.jpg']
-
+            
     mgg.queue_store.delete_file(queued_filepath)
 
     entry.save()
index c563496443a56bc1ee050b1d03113098c1e3e4ff..7649309c3db722a168fb45907f103361992a2ba9 100644 (file)
 
 import pdb
 import logging
+from PIL import Image
 
 from mediagoblin.processing import BadMediaFail
+from mediagoblin.media_types.audio import audioprocessing
 
 
 _log = logging.getLogger(__name__)
@@ -56,6 +58,73 @@ try:
 except ImportError:
     raise Exception('gst/pygst >= 0.10 could not be imported')
 
+import numpy
+
+class AudioThumbnailer(object):
+    def __init__(self):
+        _log.info('Initializing {0}'.format(self.__class__.__name__))
+
+    def spectrogram(self, src, dst, **kw):
+        width = kw['width']
+        height = int(kw.get('height', float(width) * 0.3))
+        fft_size = kw.get('fft_size', 2048)
+        callback = kw.get('progress_callback')
+
+        processor = audioprocessing.AudioProcessor(
+            src,
+            fft_size,
+            numpy.hanning)
+
+        samples_per_pixel = processor.audio_file.nframes / float(width)
+
+        spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size)
+
+        for x in range(width):
+            if callback and x % (width / 10) == 0:
+                callback((x * 100) / width)
+
+            seek_point = int(x * samples_per_pixel)
+
+            (spectral_centroid, db_spectrum) = processor.spectral_centroid(
+                seek_point)
+
+            spectrogram.draw_spectrum(x, db_spectrum)
+
+        if callback:
+            callback(100)
+
+        spectrogram.save(dst)
+
+    def thumbnail_spectrogram(self, src, dst, thumb_size):
+        '''
+        Takes a spectrogram and creates a thumbnail from it
+        '''
+        if not (type(thumb_size) == tuple and len(thumb_size) == 2):
+            raise Exception('size argument should be a tuple(width, height)')
+
+        im = Image.open(src)
+
+        im_w, im_h = [float(i) for i in im.size]
+        th_w, th_h = [float(i) for i in thumb_size]
+
+        wadsworth_position = im_w * 0.3
+
+        start_x = max((
+                wadsworth_position - (th_w / 2.0),
+                0.0))
+
+        stop_x = start_x + (im_h * (th_w / th_h))
+
+        th = im.crop((
+                int(start_x), 0,
+                int(stop_x), int(im_h)))
+
+        if th.size[0] > th_w or th.size[1] > th_h:
+            th.thumbnail(thumb_size, Image.ANTIALIAS)
+
+        th.save(dst)
+
+
 class AudioTranscoder(object):
     def __init__(self):
         _log.info('Initializing {0}'.format(self.__class__.__name__))
@@ -103,17 +172,21 @@ class AudioTranscoder(object):
 
         quality = kw.get('quality', 0.3)
 
+        mux_string = kw.get(
+            'mux_string',
+            'vorbisenc quality={0} ! webmmux'.format(quality))
+
         # Set up pipeline
         self.pipeline = gst.parse_launch(
             'filesrc location="{src}" ! ' 
             'decodebin2 ! queue ! audiorate tolerance={tolerance} ! '
             'audioconvert ! audio/x-raw-float,channels=2 ! '
-            'vorbisenc quality={quality} ! webmmux ! '
+            '{mux_string} ! '
             'progressreport silent=true ! '
             'filesink location="{dst}"'.format(
                 src=src,
                 tolerance=80000000,
-                quality=quality,
+                mux_string=mux_string,
                 dst=dst))
 
         self.bus = self.pipeline.get_bus()
@@ -141,6 +214,9 @@ class AudioTranscoder(object):
             self.halt()
 
     def halt(self):
+        if getattr(self, 'pipeline', False):
+            self.pipeline.set_state(gst.STATE_NULL)
+            del self.pipeline
         _log.info('Quitting MainLoop gracefully...')
         gobject.idle_add(self._loop.quit)
 
@@ -149,8 +225,12 @@ if __name__ == '__main__':
     logging.basicConfig()
     _log.setLevel(logging.INFO)
 
-    transcoder = AudioTranscoder()
-    data = transcoder.discover(sys.argv[1])
-    res = transcoder.transcode(*sys.argv[1:3])
+    #transcoder = AudioTranscoder()
+    #data = transcoder.discover(sys.argv[1])
+    #res = transcoder.transcode(*sys.argv[1:3])
+
+    thumbnailer = AudioThumbnailer()
+
+    thumbnailer.spectrogram(*sys.argv[1:], width=640)
 
     pdb.set_trace()
index 364a5afae462f0bc7165afd32894d14be6e8d8f3..28cde2aa7172a74ee302d14aed4f5260993bb921 100644 (file)
@@ -42,7 +42,7 @@ def sniff_handler(media_file, **kw):
             _log.info('Found file extension in supported filetypes')
             return True
         else:
-            _log.debug('Media present, extension not found in {1}'.format(
+            _log.debug('Media present, extension not found in {0}'.format(
                     SUPPORTED_FILETYPES))
     else:
         _log.warning('Need additional information (keyword argument \'media\')'
index 1890ef0cc90bd67bd8c3e0bbcdd32c6c3cc2ff4c..d2562e3b366681e69b560fc8b2762b09d74be138 100644 (file)
@@ -29,6 +29,18 @@ _log = logging.getLogger(__name__)
 _log.setLevel(logging.DEBUG)
 
 def sniff_handler(media_file, **kw):
+    transcoder = transcoders.VideoTranscoder()
+    try:
+        data = transcoder.discover(media_file.name)
+
+        _log.debug('Discovered: {0}'.format(data.__dict__))
+
+        if data.is_video == True:
+            return True
+    except:
+        _log.error('Exception caught when trying to discover {0}'.format(
+                kw.get('media')))
+
     return False
 
 def process_video(entry):
@@ -61,7 +73,8 @@ def process_video(entry):
 
     with tmp_dst:
         # Transcode queued file to a VP8/vorbis file that fits in a 640x640 square
-        transcoder = transcoders.VideoTranscoder(queued_filename, tmp_dst.name)
+        transcoder = transcoders.VideoTranscoder()
+        transcoder.transcode(queued_filename, tmp_dst.name)
 
         # Push transcoded video to public storage
         _log.debug('Saving medium...')
index 903bd81028dbee8280b6bcdd4eb9847e0585a4c2..6c2e885e26f0390f3c0f5b8803b5646cc33aa923 100644 (file)
@@ -25,8 +25,6 @@ import pdb
 import urllib
 
 _log = logging.getLogger(__name__)
-logging.basicConfig()
-_log.setLevel(logging.DEBUG)
 
 CPU_COUNT = 2
 try:
@@ -340,10 +338,15 @@ class VideoTranscoder:
        that it was refined afterwards and therefore is done more
        correctly.
     '''
-    def __init__(self, src, dst, **kwargs):
+    def __init__(self):
         _log.info('Initializing VideoTranscoder...')
 
         self.loop = gobject.MainLoop()
+
+    def transcode(self, src, dst, **kwargs):
+        '''
+        Transcode a video file into a 'medium'-sized version.
+        '''
         self.source_path = src
         self.destination_path = dst
 
@@ -357,6 +360,30 @@ class VideoTranscoder:
         self._setup()
         self._run()
 
+    def discover(self, src):
+        '''
+        Discover properties about a media file
+        '''
+        _log.info('Discovering {0}'.format(src))
+
+        self.source_path = src
+        self._setup_discover(discovered_callback=self.__on_discovered)
+
+        self.discoverer.discover()
+
+        self.loop.run()
+
+        return self._discovered_data
+
+    def __on_discovered(self, data, is_media):
+        if not is_media:
+            self.__stop()
+            raise Exception('Could not discover {0}'.format(self.source_path))
+
+        self._discovered_data = data
+
+        self.__stop_mainloop()
+
     def _setup(self):
         self._setup_discover()
         self._setup_pipeline()
@@ -369,12 +396,14 @@ class VideoTranscoder:
         _log.debug('Initializing MainLoop()')
         self.loop.run()
 
-    def _setup_discover(self):
+    def _setup_discover(self, **kw):
         _log.debug('Setting up discoverer')
         self.discoverer = discoverer.Discoverer(self.source_path)
 
         # Connect self.__discovered to the 'discovered' event
-        self.discoverer.connect('discovered', self.__discovered)
+        self.discoverer.connect(
+            'discovered',
+            kw.get('discovered_callback', self.__discovered))
 
     def __discovered(self, data, is_media):
         '''
@@ -614,14 +643,15 @@ class VideoTranscoder:
 
 if __name__ == '__main__':
     os.nice(19)
+    logging.basicConfig()
     from optparse import OptionParser
 
     parser = OptionParser(
-        usage='%prog [-v] -a [ video | thumbnail ] SRC DEST')
+        usage='%prog [-v] -a [ video | thumbnail | discover ] SRC [ DEST ]')
 
     parser.add_option('-a', '--action',
                       dest='action',
-                      help='One of "video" or "thumbnail"')
+                      help='One of "video", "discover" or "thumbnail"')
 
     parser.add_option('-v',
                       dest='verbose',
@@ -645,13 +675,18 @@ if __name__ == '__main__':
 
     _log.debug(args)
 
-    if not len(args) == 2:
+    if not len(args) == 2 and not options.action == 'discover':
         parser.print_help()
         sys.exit()
 
+    transcoder = VideoTranscoder()
+
     if options.action == 'thumbnail':
         VideoThumbnailer(*args)
     elif options.action == 'video':
         def cb(data):
             print('I\'m a callback!')
-        transcoder = VideoTranscoder(*args, progress_callback=cb)
+        transcoder.transcode(*args, progress_callback=cb)
+    elif options.action == 'discover':
+        print transcoder.discover(*args).__dict__
+    
index 802a85c1c98942b85ee8b81ee72c010244ae9088..f130e323528330a802fe5a224239b872afdf9c6c 100644 (file)
 
 {% block mediagoblin_media %}
   <div class="audio-media">
-    <audio controls="controls"
+    {% if 'spectrogram' in media.media_files %}
+      <div class="audio-spectrogram">
+       <img src="{{ request.app.public_store.file_url(
+                     media.media_files.spectrogram) }}"
+            alt="Spectrogram" />
+      </div>
+    {% endif %}
+    <audio class="audio-player" controls="controls"
           preload="metadata">
       <source src="{{ request.app.public_store.file_url(
                       media.media_files.ogg) }}" type="video/webm; encoding=&quot;vorbis&quot;" />
index 217926a41807094cda9a19455ac533e747d9569a..53ba25d845d97e07a4f8c1e36decd16f10bab367 100644 (file)
@@ -231,7 +231,8 @@ class TestSubmission:
 
         context = template.TEMPLATE_TEST_CONTEXT['mediagoblin/submit/start.html']
         form = context['submit_form']
-        assert re.match(r'^Could not extract any file extension from ".*?"$', str(form.file.errors[0]))
+        assert 'Sorry, I don\'t support that file type :(' == \
+            str(form.file.errors[0])
         assert len(form.file.errors) == 1
 
         # NOTE: The following 2 tests will ultimately fail, but they