Commit | Line | Data |
---|---|---|
10085b77 JW |
1 | #!/usr/bin/env python |
2 | # processing.py -- various audio processing functions | |
3 | # Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG) | |
4 | # UNIVERSITAT POMPEU FABRA | |
5 | # | |
6 | # This program is free software: you can redistribute it and/or modify | |
7 | # it under the terms of the GNU Affero General Public License as | |
8 | # published by the Free Software Foundation, either version 3 of the | |
9 | # License, or (at your option) any later version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, | |
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | # GNU Affero General Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU Affero General Public License | |
17 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | # | |
19 | # Authors: | |
20 | # Bram de Jong <bram.dejong at domain.com where domain in gmail> | |
21 | # 2012, Joar Wandborg <first name at last name dot se> | |
22 | ||
23 | from PIL import Image, ImageDraw, ImageColor #@UnresolvedImport | |
24 | from functools import partial | |
25 | import math | |
26 | import numpy | |
27 | import os | |
28 | import re | |
29 | import signal | |
30 | ||
31 | ||
32 | def get_sound_type(input_filename): | |
33 | sound_type = os.path.splitext(input_filename.lower())[1].strip(".") | |
34 | ||
35 | if sound_type == "fla": | |
36 | sound_type = "flac" | |
37 | elif sound_type == "aif": | |
38 | sound_type = "aiff" | |
39 | ||
40 | return sound_type | |
41 | ||
42 | ||
43 | try: | |
44 | import scikits.audiolab as audiolab | |
45 | except ImportError: | |
46 | print "WARNING: audiolab is not installed so wav2png will not work" | |
47 | import subprocess | |
48 | ||
49 | class AudioProcessingException(Exception): | |
50 | pass | |
51 | ||
52 | class TestAudioFile(object): | |
53 | """A class that mimics audiolab.sndfile but generates noise instead of reading | |
54 | a wave file. Additionally it can be told to have a "broken" header and thus crashing | |
55 | in the middle of the file. Also useful for testing ultra-short files of 20 samples.""" | |
56 | def __init__(self, num_frames, has_broken_header=False): | |
57 | self.seekpoint = 0 | |
58 | self.nframes = num_frames | |
59 | self.samplerate = 44100 | |
60 | self.channels = 1 | |
61 | self.has_broken_header = has_broken_header | |
62 | ||
63 | def seek(self, seekpoint): | |
64 | self.seekpoint = seekpoint | |
65 | ||
66 | def read_frames(self, frames_to_read): | |
67 | if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2: | |
68 | raise RuntimeError() | |
69 | ||
70 | num_frames_left = self.num_frames - self.seekpoint | |
71 | will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read | |
72 | self.seekpoint += will_read | |
73 | return numpy.random.random(will_read)*2 - 1 | |
74 | ||
75 | ||
76 | def get_max_level(filename): | |
77 | max_value = 0 | |
78 | buffer_size = 4096 | |
79 | audio_file = audiolab.Sndfile(filename, 'r') | |
80 | n_samples_left = audio_file.nframes | |
81 | ||
82 | while n_samples_left: | |
83 | to_read = min(buffer_size, n_samples_left) | |
84 | ||
85 | try: | |
86 | samples = audio_file.read_frames(to_read) | |
87 | except RuntimeError: | |
88 | # this can happen with a broken header | |
89 | break | |
90 | ||
91 | # convert to mono by selecting left channel only | |
92 | if audio_file.channels > 1: | |
93 | samples = samples[:,0] | |
94 | ||
95 | max_value = max(max_value, numpy.abs(samples).max()) | |
96 | ||
97 | n_samples_left -= to_read | |
98 | ||
99 | audio_file.close() | |
100 | ||
101 | return max_value | |
102 | ||
103 | class AudioProcessor(object): | |
104 | """ | |
105 | The audio processor processes chunks of audio an calculates the spectrac centroid and the peak | |
106 | samples in that chunk of audio. | |
107 | """ | |
108 | def __init__(self, input_filename, fft_size, window_function=numpy.hanning): | |
109 | max_level = get_max_level(input_filename) | |
110 | ||
111 | self.audio_file = audiolab.Sndfile(input_filename, 'r') | |
112 | self.fft_size = fft_size | |
113 | self.window = window_function(self.fft_size) | |
114 | self.spectrum_range = None | |
115 | self.lower = 100 | |
116 | self.higher = 22050 | |
117 | self.lower_log = math.log10(self.lower) | |
118 | self.higher_log = math.log10(self.higher) | |
119 | self.clip = lambda val, low, high: min(high, max(low, val)) | |
120 | ||
121 | # figure out what the maximum value is for an FFT doing the FFT of a DC signal | |
122 | fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window) | |
123 | max_fft = (numpy.abs(fft)).max() | |
124 | # set the scale to normalized audio and normalized FFT | |
125 | self.scale = 1.0/max_level/max_fft if max_level > 0 else 1 | |
126 | ||
127 | def read(self, start, size, resize_if_less=False): | |
128 | """ read size samples starting at start, if resize_if_less is True and less than size | |
129 | samples are read, resize the array to size and fill with zeros """ | |
130 | ||
131 | # number of zeros to add to start and end of the buffer | |
132 | add_to_start = 0 | |
133 | add_to_end = 0 | |
134 | ||
135 | if start < 0: | |
136 | # the first FFT window starts centered around zero | |
137 | if size + start <= 0: | |
138 | return numpy.zeros(size) if resize_if_less else numpy.array([]) | |
139 | else: | |
140 | self.audio_file.seek(0) | |
141 | ||
142 | add_to_start = -start # remember: start is negative! | |
143 | to_read = size + start | |
144 | ||
145 | if to_read > self.audio_file.nframes: | |
146 | add_to_end = to_read - self.audio_file.nframes | |
147 | to_read = self.audio_file.nframes | |
148 | else: | |
149 | self.audio_file.seek(start) | |
150 | ||
151 | to_read = size | |
152 | if start + to_read >= self.audio_file.nframes: | |
153 | to_read = self.audio_file.nframes - start | |
154 | add_to_end = size - to_read | |
155 | ||
156 | try: | |
157 | samples = self.audio_file.read_frames(to_read) | |
158 | except RuntimeError: | |
159 | # this can happen for wave files with broken headers... | |
160 | return numpy.zeros(size) if resize_if_less else numpy.zeros(2) | |
161 | ||
162 | # convert to mono by selecting left channel only | |
163 | if self.audio_file.channels > 1: | |
164 | samples = samples[:,0] | |
165 | ||
166 | if resize_if_less and (add_to_start > 0 or add_to_end > 0): | |
167 | if add_to_start > 0: | |
168 | samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1) | |
169 | ||
170 | if add_to_end > 0: | |
171 | samples = numpy.resize(samples, size) | |
172 | samples[size - add_to_end:] = 0 | |
173 | ||
174 | return samples | |
175 | ||
176 | ||
177 | def spectral_centroid(self, seek_point, spec_range=110.0): | |
178 | """ starting at seek_point read fft_size samples, and calculate the spectral centroid """ | |
179 | ||
180 | samples = self.read(seek_point - self.fft_size/2, self.fft_size, True) | |
181 | ||
182 | samples *= self.window | |
183 | fft = numpy.fft.rfft(samples) | |
184 | spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1 | |
185 | length = numpy.float64(spectrum.shape[0]) | |
186 | ||
187 | # scale the db spectrum from [- spec_range db ... 0 db] > [0..1] | |
188 | db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range | |
189 | ||
190 | energy = spectrum.sum() | |
191 | spectral_centroid = 0 | |
192 | ||
193 | if energy > 1e-60: | |
194 | # calculate the spectral centroid | |
195 | ||
196 | if self.spectrum_range == None: | |
197 | self.spectrum_range = numpy.arange(length) | |
198 | ||
199 | spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5 | |
200 | ||
201 | # clip > log10 > scale between 0 and 1 | |
202 | spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log) | |
203 | ||
204 | return (spectral_centroid, db_spectrum) | |
205 | ||
206 | ||
207 | def peaks(self, start_seek, end_seek): | |
208 | """ read all samples between start_seek and end_seek, then find the minimum and maximum peak | |
209 | in that range. Returns that pair in the order they were found. So if min was found first, | |
210 | it returns (min, max) else the other way around. """ | |
211 | ||
212 | # larger blocksizes are faster but take more mem... | |
213 | # Aha, Watson, a clue, a tradeof! | |
214 | block_size = 4096 | |
215 | ||
216 | max_index = -1 | |
217 | max_value = -1 | |
218 | min_index = -1 | |
219 | min_value = 1 | |
220 | ||
221 | if start_seek < 0: | |
222 | start_seek = 0 | |
223 | ||
224 | if end_seek > self.audio_file.nframes: | |
225 | end_seek = self.audio_file.nframes | |
226 | ||
227 | if end_seek <= start_seek: | |
228 | samples = self.read(start_seek, 1) | |
229 | return (samples[0], samples[0]) | |
230 | ||
231 | if block_size > end_seek - start_seek: | |
232 | block_size = end_seek - start_seek | |
233 | ||
234 | for i in range(start_seek, end_seek, block_size): | |
235 | samples = self.read(i, block_size) | |
236 | ||
237 | local_max_index = numpy.argmax(samples) | |
238 | local_max_value = samples[local_max_index] | |
239 | ||
240 | if local_max_value > max_value: | |
241 | max_value = local_max_value | |
242 | max_index = local_max_index | |
243 | ||
244 | local_min_index = numpy.argmin(samples) | |
245 | local_min_value = samples[local_min_index] | |
246 | ||
247 | if local_min_value < min_value: | |
248 | min_value = local_min_value | |
249 | min_index = local_min_index | |
250 | ||
251 | return (min_value, max_value) if min_index < max_index else (max_value, min_value) | |
252 | ||
253 | ||
254 | def interpolate_colors(colors, flat=False, num_colors=256): | |
255 | """ given a list of colors, create a larger list of colors interpolating | |
256 | the first one. If flatten is True a list of numers will be returned. If | |
257 | False, a list of (r,g,b) tuples. num_colors is the number of colors wanted | |
258 | in the final list """ | |
259 | ||
260 | palette = [] | |
261 | ||
262 | for i in range(num_colors): | |
263 | index = (i * (len(colors) - 1))/(num_colors - 1.0) | |
264 | index_int = int(index) | |
265 | alpha = index - float(index_int) | |
266 | ||
267 | if alpha > 0: | |
268 | r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0] | |
269 | g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1] | |
270 | b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2] | |
271 | else: | |
272 | r = (1.0 - alpha) * colors[index_int][0] | |
273 | g = (1.0 - alpha) * colors[index_int][1] | |
274 | b = (1.0 - alpha) * colors[index_int][2] | |
275 | ||
276 | if flat: | |
277 | palette.extend((int(r), int(g), int(b))) | |
278 | else: | |
279 | palette.append((int(r), int(g), int(b))) | |
280 | ||
281 | return palette | |
282 | ||
283 | ||
284 | def desaturate(rgb, amount): | |
285 | """ | |
286 | desaturate colors by amount | |
287 | amount == 0, no change | |
288 | amount == 1, grey | |
289 | """ | |
290 | luminosity = sum(rgb) / 3.0 | |
291 | desat = lambda color: color - amount * (color - luminosity) | |
292 | ||
293 | return tuple(map(int, map(desat, rgb))) | |
294 | ||
295 | ||
296 | class WaveformImage(object): | |
297 | """ | |
298 | Given peaks and spectral centroids from the AudioProcessor, this class will construct | |
299 | a wavefile image which can be saved as PNG. | |
300 | """ | |
301 | def __init__(self, image_width, image_height, palette=1): | |
302 | if image_height % 2 == 0: | |
303 | raise AudioProcessingException, "Height should be uneven: images look much better at uneven height" | |
304 | ||
305 | if palette == 1: | |
306 | background_color = (0,0,0) | |
307 | colors = [ | |
308 | (50,0,200), | |
309 | (0,220,80), | |
310 | (255,224,0), | |
311 | (255,70,0), | |
312 | ] | |
313 | elif palette == 2: | |
314 | background_color = (0,0,0) | |
315 | colors = [self.color_from_value(value/29.0) for value in range(0,30)] | |
316 | elif palette == 3: | |
317 | background_color = (213, 217, 221) | |
318 | colors = map( partial(desaturate, amount=0.7), [ | |
319 | (50,0,200), | |
320 | (0,220,80), | |
321 | (255,224,0), | |
322 | ]) | |
323 | elif palette == 4: | |
324 | background_color = (213, 217, 221) | |
325 | colors = map( partial(desaturate, amount=0.8), [self.color_from_value(value/29.0) for value in range(0,30)]) | |
326 | ||
327 | self.image = Image.new("RGB", (image_width, image_height), background_color) | |
328 | ||
329 | self.image_width = image_width | |
330 | self.image_height = image_height | |
331 | ||
332 | self.draw = ImageDraw.Draw(self.image) | |
333 | self.previous_x, self.previous_y = None, None | |
334 | ||
335 | self.color_lookup = interpolate_colors(colors) | |
336 | self.pix = self.image.load() | |
337 | ||
338 | def color_from_value(self, value): | |
339 | """ given a value between 0 and 1, return an (r,g,b) tuple """ | |
340 | ||
341 | return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50)) | |
342 | ||
343 | def draw_peaks(self, x, peaks, spectral_centroid): | |
344 | """ draw 2 peaks at x using the spectral_centroid for color """ | |
345 | ||
346 | y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5 | |
347 | y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5 | |
348 | ||
349 | line_color = self.color_lookup[int(spectral_centroid*255.0)] | |
350 | ||
351 | if self.previous_y != None: | |
352 | self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color) | |
353 | else: | |
354 | self.draw.line([x, y1, x, y2], line_color) | |
355 | ||
356 | self.previous_x, self.previous_y = x, y2 | |
357 | ||
358 | self.draw_anti_aliased_pixels(x, y1, y2, line_color) | |
359 | ||
360 | def draw_anti_aliased_pixels(self, x, y1, y2, color): | |
361 | """ vertical anti-aliasing at y1 and y2 """ | |
362 | ||
363 | y_max = max(y1, y2) | |
364 | y_max_int = int(y_max) | |
365 | alpha = y_max - y_max_int | |
366 | ||
367 | if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height: | |
368 | current_pix = self.pix[x, y_max_int + 1] | |
369 | ||
370 | r = int((1-alpha)*current_pix[0] + alpha*color[0]) | |
371 | g = int((1-alpha)*current_pix[1] + alpha*color[1]) | |
372 | b = int((1-alpha)*current_pix[2] + alpha*color[2]) | |
373 | ||
374 | self.pix[x, y_max_int + 1] = (r,g,b) | |
375 | ||
376 | y_min = min(y1, y2) | |
377 | y_min_int = int(y_min) | |
378 | alpha = 1.0 - (y_min - y_min_int) | |
379 | ||
380 | if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0: | |
381 | current_pix = self.pix[x, y_min_int - 1] | |
382 | ||
383 | r = int((1-alpha)*current_pix[0] + alpha*color[0]) | |
384 | g = int((1-alpha)*current_pix[1] + alpha*color[1]) | |
385 | b = int((1-alpha)*current_pix[2] + alpha*color[2]) | |
386 | ||
387 | self.pix[x, y_min_int - 1] = (r,g,b) | |
388 | ||
389 | def save(self, filename): | |
390 | # draw a zero "zero" line | |
391 | a = 25 | |
392 | for x in range(self.image_width): | |
393 | self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2])) | |
394 | ||
395 | self.image.save(filename) | |
396 | ||
397 | ||
398 | class SpectrogramImage(object): | |
399 | """ | |
400 | Given spectra from the AudioProcessor, this class will construct a wavefile image which | |
401 | can be saved as PNG. | |
402 | """ | |
403 | def __init__(self, image_width, image_height, fft_size): | |
404 | self.image_width = image_width | |
405 | self.image_height = image_height | |
406 | self.fft_size = fft_size | |
407 | ||
408 | self.image = Image.new("RGBA", (image_height, image_width)) | |
409 | ||
410 | colors = [ | |
411 | (0, 0, 0, 0), | |
412 | (58/4, 68/4, 65/4, 255), | |
413 | (80/2, 100/2, 153/2, 255), | |
414 | (90, 180, 100, 255), | |
415 | (224, 224, 44, 255), | |
416 | (255, 60, 30, 255), | |
417 | (255, 255, 255, 255) | |
418 | ] | |
419 | self.palette = interpolate_colors(colors) | |
420 | ||
421 | # generate the lookup which translates y-coordinate to fft-bin | |
422 | self.y_to_bin = [] | |
423 | f_min = 100.0 | |
424 | f_max = 22050.0 | |
425 | y_min = math.log10(f_min) | |
426 | y_max = math.log10(f_max) | |
427 | for y in range(self.image_height): | |
428 | freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min)) | |
429 | bin = freq / 22050.0 * (self.fft_size/2 + 1) | |
430 | ||
431 | if bin < self.fft_size/2: | |
432 | alpha = bin - int(bin) | |
433 | ||
434 | self.y_to_bin.append((int(bin), alpha * 255)) | |
435 | ||
436 | # this is a bit strange, but using image.load()[x,y] = ... is | |
437 | # a lot slower than using image.putadata and then rotating the image | |
438 | # so we store all the pixels in an array and then create the image when saving | |
439 | self.pixels = [] | |
440 | ||
441 | def draw_spectrum(self, x, spectrum): | |
442 | # for all frequencies, draw the pixels | |
443 | for (index, alpha) in self.y_to_bin: | |
444 | self.pixels.append( self.palette[int((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1])] ) | |
445 | ||
446 | # if the FFT is too small to fill up the image, fill with black to the top | |
447 | for y in range(len(self.y_to_bin), self.image_height): #@UnusedVariable | |
448 | self.pixels.append(self.palette[0]) | |
449 | ||
450 | def save(self, filename, quality=80): | |
451 | assert filename.lower().endswith(".jpg") | |
452 | self.image.putdata(self.pixels) | |
453 | self.image.transpose(Image.ROTATE_90).save(filename, quality=quality) | |
454 | ||
455 | ||
456 | def create_wave_images(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, progress_callback=None): | |
457 | """ | |
458 | Utility function for creating both wavefile and spectrum images from an audio input file. | |
459 | """ | |
460 | processor = AudioProcessor(input_filename, fft_size, numpy.hanning) | |
461 | samples_per_pixel = processor.audio_file.nframes / float(image_width) | |
462 | ||
463 | waveform = WaveformImage(image_width, image_height) | |
464 | spectrogram = SpectrogramImage(image_width, image_height, fft_size) | |
465 | ||
466 | for x in range(image_width): | |
467 | ||
468 | if progress_callback and x % (image_width/10) == 0: | |
469 | progress_callback((x*100)/image_width) | |
470 | ||
471 | seek_point = int(x * samples_per_pixel) | |
472 | next_seek_point = int((x + 1) * samples_per_pixel) | |
473 | ||
474 | (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) | |
475 | peaks = processor.peaks(seek_point, next_seek_point) | |
476 | ||
477 | waveform.draw_peaks(x, peaks, spectral_centroid) | |
478 | spectrogram.draw_spectrum(x, db_spectrum) | |
479 | ||
480 | if progress_callback: | |
481 | progress_callback(100) | |
482 | ||
483 | waveform.save(output_filename_w) | |
484 | spectrogram.save(output_filename_s) | |
485 | ||
486 | ||
487 | class NoSpaceLeftException(Exception): | |
488 | pass | |
489 | ||
490 | def convert_to_pcm(input_filename, output_filename): | |
491 | """ | |
492 | converts any audio file type to pcm audio | |
493 | """ | |
494 | ||
495 | if not os.path.exists(input_filename): | |
496 | raise AudioProcessingException, "file %s does not exist" % input_filename | |
497 | ||
498 | sound_type = get_sound_type(input_filename) | |
499 | ||
500 | if sound_type == "mp3": | |
501 | cmd = ["lame", "--decode", input_filename, output_filename] | |
502 | elif sound_type == "ogg": | |
503 | cmd = ["oggdec", input_filename, "-o", output_filename] | |
504 | elif sound_type == "flac": | |
505 | cmd = ["flac", "-f", "-d", "-s", "-o", output_filename, input_filename] | |
506 | else: | |
507 | return False | |
508 | ||
509 | process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
510 | (stdout, stderr) = process.communicate() | |
511 | ||
512 | if process.returncode != 0 or not os.path.exists(output_filename): | |
513 | if "No space left on device" in stderr + " " + stdout: | |
514 | raise NoSpaceLeftException | |
515 | raise AudioProcessingException, "failed converting to pcm data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout | |
516 | ||
517 | return True | |
518 | ||
519 | ||
520 | def stereofy_and_find_info(stereofy_executble_path, input_filename, output_filename): | |
521 | """ | |
522 | converts a pcm wave file to two channel, 16 bit integer | |
523 | """ | |
524 | ||
525 | if not os.path.exists(input_filename): | |
526 | raise AudioProcessingException, "file %s does not exist" % input_filename | |
527 | ||
528 | cmd = [stereofy_executble_path, "--input", input_filename, "--output", output_filename] | |
529 | ||
530 | process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
531 | (stdout, stderr) = process.communicate() | |
532 | ||
533 | if process.returncode != 0 or not os.path.exists(output_filename): | |
534 | if "No space left on device" in stderr + " " + stdout: | |
535 | raise NoSpaceLeftException | |
536 | raise AudioProcessingException, "failed calling stereofy data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout | |
537 | ||
538 | stdout = (stdout + " " + stderr).replace("\n", " ") | |
539 | ||
540 | duration = 0 | |
541 | m = re.match(r".*#duration (?P<duration>[\d\.]+).*", stdout) | |
542 | if m != None: | |
543 | duration = float(m.group("duration")) | |
544 | ||
545 | channels = 0 | |
546 | m = re.match(r".*#channels (?P<channels>\d+).*", stdout) | |
547 | if m != None: | |
548 | channels = float(m.group("channels")) | |
549 | ||
550 | samplerate = 0 | |
551 | m = re.match(r".*#samplerate (?P<samplerate>\d+).*", stdout) | |
552 | if m != None: | |
553 | samplerate = float(m.group("samplerate")) | |
554 | ||
555 | bitdepth = None | |
556 | m = re.match(r".*#bitdepth (?P<bitdepth>\d+).*", stdout) | |
557 | if m != None: | |
558 | bitdepth = float(m.group("bitdepth")) | |
559 | ||
560 | bitrate = (os.path.getsize(input_filename) * 8.0) / 1024.0 / duration if duration > 0 else 0 | |
561 | ||
562 | return dict(duration=duration, channels=channels, samplerate=samplerate, bitrate=bitrate, bitdepth=bitdepth) | |
563 | ||
564 | ||
565 | def convert_to_mp3(input_filename, output_filename, quality=70): | |
566 | """ | |
567 | converts the incoming wave file to a mp3 file | |
568 | """ | |
569 | ||
570 | if not os.path.exists(input_filename): | |
571 | raise AudioProcessingException, "file %s does not exist" % input_filename | |
572 | ||
573 | command = ["lame", "--silent", "--abr", str(quality), input_filename, output_filename] | |
574 | ||
575 | process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
576 | (stdout, stderr) = process.communicate() | |
577 | ||
578 | if process.returncode != 0 or not os.path.exists(output_filename): | |
579 | raise AudioProcessingException, stdout | |
580 | ||
581 | def convert_to_ogg(input_filename, output_filename, quality=1): | |
582 | """ | |
583 | converts the incoming wave file to n ogg file | |
584 | """ | |
585 | ||
586 | if not os.path.exists(input_filename): | |
587 | raise AudioProcessingException, "file %s does not exist" % input_filename | |
588 | ||
589 | command = ["oggenc", "-q", str(quality), input_filename, "-o", output_filename] | |
590 | ||
591 | process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
592 | (stdout, stderr) = process.communicate() | |
593 | ||
594 | if process.returncode != 0 or not os.path.exists(output_filename): | |
595 | raise AudioProcessingException, stdout | |
596 | ||
597 | def convert_using_ffmpeg(input_filename, output_filename): | |
598 | """ | |
599 | converts the incoming wave file to stereo pcm using fffmpeg | |
600 | """ | |
601 | TIMEOUT = 3 * 60 | |
602 | def alarm_handler(signum, frame): | |
603 | raise AudioProcessingException, "timeout while waiting for ffmpeg" | |
604 | ||
605 | if not os.path.exists(input_filename): | |
606 | raise AudioProcessingException, "file %s does not exist" % input_filename | |
607 | ||
608 | command = ["ffmpeg", "-y", "-i", input_filename, "-ac","1","-acodec", "pcm_s16le", "-ar", "44100", output_filename] | |
609 | ||
610 | process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
611 | signal.signal(signal.SIGALRM,alarm_handler) | |
612 | signal.alarm(TIMEOUT) | |
613 | (stdout, stderr) = process.communicate() | |
614 | signal.alarm(0) | |
615 | if process.returncode != 0 or not os.path.exists(output_filename): | |
616 | raise AudioProcessingException, stdout |