[mediagoblin.git] / mediagoblin / media_types / audio / processing.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse
import logging
from tempfile import NamedTemporaryFile
import os

from mediagoblin import mg_globals as mgg
from mediagoblin.processing import (
    create_pub_filepath, BadMediaFail, FilenameBuilder,
    ProgressCallback, MediaProcessor, ProcessingManager,
    request_from_args, get_orig_filename,
    store_public, copy_original)

from mediagoblin.media_types.audio.transcoders import (
    AudioTranscoder,AudioThumbnailer)

_log = logging.getLogger(__name__)

MEDIA_TYPE = 'mediagoblin.media_types.audio'


def sniff_handler(media_file, **kw):
    _log.info('Sniffing {0}'.format(MEDIA_TYPE))
    try:
        transcoder = AudioTranscoder()
        data = transcoder.discover(media_file.name)
    except BadMediaFail:
        _log.debug('Audio discovery raised BadMediaFail')
        return None

    if data.is_audio == True and data.is_video == False:
        return MEDIA_TYPE

    return None


def process_audio(proc_state):
    """Code to process uploaded audio. Will be run by celery.

    A Workbench() represents a local tempory dir. It is automatically
    cleaned up when this function exits.
    """
    entry = proc_state.entry
    workbench = proc_state.workbench
    audio_config = mgg.global_config['media_type:mediagoblin.media_types.audio']

    queued_filepath = entry.queued_media_file
    queued_filename = workbench.localized_file(
        mgg.queue_store, queued_filepath,
        'source')
    name_builder = FilenameBuilder(queued_filename)

    webm_audio_filepath = create_pub_filepath(
        entry,
        '{original}.webm'.format(
            original=os.path.splitext(
                queued_filepath[-1])[0]))

    if audio_config['keep_original']:
        with open(queued_filename, 'rb') as queued_file:
            original_filepath = create_pub_filepath(
                entry, name_builder.fill('{basename}{ext}'))

            with mgg.public_store.get_file(original_filepath, 'wb') as \
                    original_file:
                _log.debug('Saving original...')
                original_file.write(queued_file.read())

            entry.media_files['original'] = original_filepath

    transcoder = AudioTranscoder()

    with NamedTemporaryFile(dir=workbench.dir) as webm_audio_tmp:
        progress_callback = ProgressCallback(entry)

        transcoder.transcode(
            queued_filename,
            webm_audio_tmp.name,
            quality=audio_config['quality'],
            progress_callback=progress_callback)

        transcoder.discover(webm_audio_tmp.name)

        _log.debug('Saving medium...')
        mgg.public_store.get_file(webm_audio_filepath, 'wb').write(
            webm_audio_tmp.read())

        entry.media_files['webm_audio'] = webm_audio_filepath

        # entry.media_data_init(length=int(data.audiolength))

    if audio_config['create_spectrogram']:
        spectrogram_filepath = create_pub_filepath(
            entry,
            '{original}-spectrogram.jpg'.format(
                original=os.path.splitext(
                    queued_filepath[-1])[0]))

        with NamedTemporaryFile(dir=workbench.dir, suffix='.ogg') as wav_tmp:
            _log.info('Creating OGG source for spectrogram')
            transcoder.transcode(
                queued_filename,
                wav_tmp.name,
                mux_string='vorbisenc quality={0} ! oggmux'.format(
                    audio_config['quality']))

            thumbnailer = AudioThumbnailer()

            with NamedTemporaryFile(dir=workbench.dir, suffix='.jpg') as spectrogram_tmp:
                thumbnailer.spectrogram(
                    wav_tmp.name,
                    spectrogram_tmp.name,
                    width=mgg.global_config['media:medium']['max_width'],
                    fft_size=audio_config['spectrogram_fft_size'])

                _log.debug('Saving spectrogram...')
                mgg.public_store.get_file(spectrogram_filepath, 'wb').write(
                    spectrogram_tmp.read())

                entry.media_files['spectrogram'] = spectrogram_filepath

                with NamedTemporaryFile(dir=workbench.dir, suffix='.jpg') as thumb_tmp:
                    thumbnailer.thumbnail_spectrogram(
                        spectrogram_tmp.name,
                        thumb_tmp.name,
                        (mgg.global_config['media:thumb']['max_width'],
                         mgg.global_config['media:thumb']['max_height']))

                    thumb_filepath = create_pub_filepath(
                        entry,
                        '{original}-thumbnail.jpg'.format(
                            original=os.path.splitext(
                                queued_filepath[-1])[0]))

                    mgg.public_store.get_file(thumb_filepath, 'wb').write(
                        thumb_tmp.read())

                    entry.media_files['thumb'] = thumb_filepath
    else:
        entry.media_files['thumb'] = ['fake', 'thumb', 'path.jpg']

    # Remove queued media file from storage and database.
    # queued_filepath is in the task_id directory which should
    # be removed too, but fail if the directory is not empty to be on
    # the super-safe side.
    mgg.queue_store.delete_file(queued_filepath)      # rm file
    mgg.queue_store.delete_dir(queued_filepath[:-1])  # rm dir
    entry.queued_media_file = []


class CommonAudioProcessor(MediaProcessor):
    """
    Provides a base for various audio processing steps
    """

    def common_setup(self):
        """
        """
        self.audio_config = mgg \
            .global_config['media_type:mediagoblin.media_types.audio']

        # Pull down and set up the original file
        self.orig_filename = get_orig_filename(
            self.entry, self.workbench)
        self.name_builder = FilenameBuilder(self.orig_filename)

        self.spectrogram_tmp = os.path.join(self.workbench.dir,
                                            self.name_builder.fill(
                                                '{basename}-spectrogram.jpg'))

        self.transcoder = AudioTranscoder()
        self.thumbnailer = AudioThumbnailer()

    def copy_original(self):
        if self.audio_config['keep_original']:
            copy_original(
                self.entry, self.orig_filename,
                self.name_builder.fill('{basename}{ext}'))

    def transcode(self, quality=None):
        if not quality:
            quality = self.audio_config['quality']

        progress_callback = ProgressCallback(self.entry)
        webm_audio_tmp = os.path.join(self.workbench.dir,
                                      self.name_builder.fill(
                                          '{basename}{ext}'))

        webm_audio_filepath = create_pub_filepath(
            self.entry,
            '{original}.webm'.format(
                original=os.path.splitext(
                    self.orig_filename[-1])[0]))

        self.transcoder.transcode(
            self.orig_filename,
            webm_audio_tmp,
            quality=quality,
            progress_callback=progress_callback)

        self.transcoder.discover(webm_audio_tmp)

        _log.debug('Saving medium...')
        store_public(self.entry, 'medium', webm_audio_tmp,
                     webm_audio_filepath)

    def create_spectrogram(self, quality=None, max_width=None, fft_size=None):
        if not quality:
            quality = self.audio_config['quality']
        if not max_width:
            max_width = mgg.global_config['media:medium']['max_width']
        if not fft_size:
            fft_size = self.audio_config['spectrogram_fft_size']

        spectrogram_filepath = create_pub_filepath(
            self.entry,
            '{original}-spectrogram.jpg'.format(
                original=os.path.splitext(
                    self.orig_filename[-1])[0]))

        wav_tmp = os.path.join(self.workbench.dir, self.name_builder.fill(
            '{basename}.ogg'))

        _log.info('Creating OGG source for spectrogram')
        self.transcoder.transcode(
            self.orig_filename,
            wav_tmp,
            mux_string='vorbisenc quality={0} ! oggmux'.format(quality))

        self.thumbnailer.spectrogram(
            wav_tmp,
            self.spectrogram_tmp,
            width=max_width,
            fft_size=fft_size)

        _log.debug('Saving spectrogram...')
        store_public(self.entry, 'spectrogram', self.spectrogram_tmp,
                     spectrogram_filepath)

    def generate_thumb(self, size=None):
        if not size:
            max_width = mgg.global_config['medium:thumb']['max_width']
            max_height = mgg.global_config['medium:thumb']['max_height']
            size = (max_width, max_height)

        thumb_tmp = os.path.join(self.workbench.dir, self.name_builder.fill(
            '{basename}-thumbnail.jpg'))

        self.thumbnailer.thumbnail_spectrogram(
            self.spectrogram_tmp,
            thumb_tmp,
            size)

        thumb_filepath = create_pub_filepath(
            self.entry,
            '{original}-thumbnail.jpg'.format(
                original=os.path.splitext(
                    self.orig_filename[-1])[0]))

        store_public(self.entry, 'thumb', thumb_tmp, thumb_filepath)


class InitialProcessor(CommonAudioProcessor):
    """
    Initial processing steps for new audio
    """
    name = "initial"
    description = "Initial processing"

    @classmethod
    def media_is_eligible(cls, entry=None, state=None):
        """
        Determine if this media type is eligible for processing
        """
        if not state:
            state = entry.state
        return state in (
            "unprocessed", "failed")

    @classmethod
    def generate_parser(cls):
        parser = argparse.ArgumentParser(
            description=cls.description,
            prog=cls.name)

        parser.add_argument(
            '--quality',
            help='vorbisenc quality')

        parser.add_argument(
            '--fft_size',
            type=int,
            help='spectrogram fft size')

        parser.add_argument(
            '--thumb_size',
            metavar=('max_width', 'max_height'),
            type=int)

        parser.add_argument(
            '--medium_width',
            type=int,
            help='The width of the spectogram')

        parser.add_argument(
            '--create_spectrogram',
            action='store_true',
            help='Create spectogram and thumbnail')

        return parser

    @classmethod
    def args_to_request(cls, args):
        return request_from_args(
            args, ['create_spectrogram', 'quality', 'fft_size',
                   'thumb_size', 'medium_width'])

    def process(self, quality=None, fft_size=None, thumb_size=None,
                create_spectrogram=None, medium_width=None):
        if not create_spectrogram:
            create_spectrogram = self.audio_config['create_spectrogram']

        self.common_setup()
        self.transcode(quality=quality)
        self.copy_original()

        if create_spectrogram:
            self.create_spectrogram(quality=quality, max_width=medium_width,
                                    fft_size=fft_size)
            self.generate_thumb(size=thumb_size)
        self.delete_queue_file()


class AudioProcessingManager(ProcessingManager):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.add_processor(InitialProcessor)
Commit	Line	Data
5a34a80d JW	1	# GNU MediaGoblin -- federated, autonomous media hosting
	2	# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
5ac1fe80	17	import argparse
5a34a80d	18	import logging
25e39842	19	from tempfile import NamedTemporaryFile
5a34a80d JW	20	import os
	21
	22	from mediagoblin import mg_globals as mgg
5ac1fe80 RE	23	from mediagoblin.processing import (
	24	create_pub_filepath, BadMediaFail, FilenameBuilder,
	25	ProgressCallback, MediaProcessor, ProcessingManager,
	26	request_from_args, get_orig_filename,
	27	store_public, copy_original)
5a34a80d	28
5ac1fe80 RE	29	from mediagoblin.media_types.audio.transcoders import (
5ac1fe80 RE	30	AudioTranscoder,AudioThumbnailer)
5a34a80d	31
10085b77	32	_log = logging.getLogger(__name__)
5a34a80d	33
df68438a RE	34	MEDIA_TYPE = 'mediagoblin.media_types.audio'
df68438a RE	35
64712915	36
ec4261a4	37	def sniff_handler(media_file, **kw):
df68438a	38	_log.info('Sniffing {0}'.format(MEDIA_TYPE))
196a5181	39	try:
4f4f2531	40	transcoder = AudioTranscoder()
ec4261a4	41	data = transcoder.discover(media_file.name)
4f4f2531 JW	42	except BadMediaFail:
4f4f2531 JW	43	_log.debug('Audio discovery raised BadMediaFail')
df68438a	44	return None
ec4261a4	45
4f4f2531	46	if data.is_audio == True and data.is_video == False:
df68438a	47	return MEDIA_TYPE
10085b77	48
df68438a	49	return None
5a34a80d	50
64712915	51
fb46fa66	52	def process_audio(proc_state):
45ab3e07	53	"""Code to process uploaded audio. Will be run by celery.
5a34a80d	54
45ab3e07 SS	55	A Workbench() represents a local tempory dir. It is automatically
	56	cleaned up when this function exits.
	57	"""
fb46fa66 E	58	entry = proc_state.entry
fb46fa66 E	59	workbench = proc_state.workbench
45ab3e07	60	audio_config = mgg.global_config['media_type:mediagoblin.media_types.audio']
5a34a80d JW	61
	62	queued_filepath = entry.queued_media_file
	63	queued_filename = workbench.localized_file(
	64	mgg.queue_store, queued_filepath,
	65	'source')
b781c3c9	66	name_builder = FilenameBuilder(queued_filename)
5a34a80d	67
b781c3c9	68	webm_audio_filepath = create_pub_filepath(
5a34a80d JW	69	entry,
	70	'{original}.webm'.format(
	71	original=os.path.splitext(
	72	queued_filepath[-1])[0]))
	73
b781c3c9 JK	74	if audio_config['keep_original']:
	75	with open(queued_filename, 'rb') as queued_file:
	76	original_filepath = create_pub_filepath(
	77	entry, name_builder.fill('{basename}{ext}'))
	78
	79	with mgg.public_store.get_file(original_filepath, 'wb') as \
	80	original_file:
	81	_log.debug('Saving original...')
	82	original_file.write(queued_file.read())
	83
	84	entry.media_files['original'] = original_filepath
	85
10085b77	86	transcoder = AudioTranscoder()
5a34a80d	87
25e39842	88	with NamedTemporaryFile(dir=workbench.dir) as webm_audio_tmp:
64712915	89	progress_callback = ProgressCallback(entry)
5a34a80d JW	90
	91	transcoder.transcode(
	92	queued_filename,
b781c3c9	93	webm_audio_tmp.name,
64712915 JW	94	quality=audio_config['quality'],
64712915 JW	95	progress_callback=progress_callback)
5a34a80d	96
a855e92a	97	transcoder.discover(webm_audio_tmp.name)
5a34a80d JW	98
5a34a80d JW	99	_log.debug('Saving medium...')
b781c3c9 JK	100	mgg.public_store.get_file(webm_audio_filepath, 'wb').write(
b781c3c9 JK	101	webm_audio_tmp.read())
5a34a80d	102
b781c3c9	103	entry.media_files['webm_audio'] = webm_audio_filepath
5a34a80d	104
c7cf6235	105	# entry.media_data_init(length=int(data.audiolength))
5a34a80d	106
10085b77 JW	107	if audio_config['create_spectrogram']:
	108	spectrogram_filepath = create_pub_filepath(
	109	entry,
	110	'{original}-spectrogram.jpg'.format(
	111	original=os.path.splitext(
	112	queued_filepath[-1])[0]))
	113
25e39842	114	with NamedTemporaryFile(dir=workbench.dir, suffix='.ogg') as wav_tmp:
549000d9	115	_log.info('Creating OGG source for spectrogram')
10085b77 JW	116	transcoder.transcode(
	117	queued_filename,
	118	wav_tmp.name,
549000d9 JW	119	mux_string='vorbisenc quality={0} ! oggmux'.format(
549000d9 JW	120	audio_config['quality']))
10085b77 JW	121
	122	thumbnailer = AudioThumbnailer()
	123
25e39842	124	with NamedTemporaryFile(dir=workbench.dir, suffix='.jpg') as spectrogram_tmp:
10085b77 JW	125	thumbnailer.spectrogram(
	126	wav_tmp.name,
	127	spectrogram_tmp.name,
196a5181 JW	128	width=mgg.global_config['media:medium']['max_width'],
196a5181 JW	129	fft_size=audio_config['spectrogram_fft_size'])
10085b77 JW	130
	131	_log.debug('Saving spectrogram...')
	132	mgg.public_store.get_file(spectrogram_filepath, 'wb').write(
	133	spectrogram_tmp.read())
	134
	135	entry.media_files['spectrogram'] = spectrogram_filepath
	136
25e39842	137	with NamedTemporaryFile(dir=workbench.dir, suffix='.jpg') as thumb_tmp:
10085b77 JW	138	thumbnailer.thumbnail_spectrogram(
	139	spectrogram_tmp.name,
	140	thumb_tmp.name,
	141	(mgg.global_config['media:thumb']['max_width'],
	142	mgg.global_config['media:thumb']['max_height']))
	143
	144	thumb_filepath = create_pub_filepath(
	145	entry,
	146	'{original}-thumbnail.jpg'.format(
	147	original=os.path.splitext(
	148	queued_filepath[-1])[0]))
	149
	150	mgg.public_store.get_file(thumb_filepath, 'wb').write(
	151	thumb_tmp.read())
	152
	153	entry.media_files['thumb'] = thumb_filepath
	154	else:
5a34a80d	155	entry.media_files['thumb'] = ['fake', 'thumb', 'path.jpg']
196a5181	156
36ae6bcb SS	157	# Remove queued media file from storage and database.
	158	# queued_filepath is in the task_id directory which should
	159	# be removed too, but fail if the directory is not empty to be on
	160	# the super-safe side.
	161	mgg.queue_store.delete_file(queued_filepath) # rm file
	162	mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
	163	entry.queued_media_file = []
5ac1fe80 RE	164
	165
	166	class CommonAudioProcessor(MediaProcessor):
	167	"""
	168	Provides a base for various audio processing steps
	169	"""
	170
	171	def common_setup(self):
	172	"""
	173	"""
	174	self.audio_config = mgg \
	175	.global_config['media_type:mediagoblin.media_types.audio']
	176
	177	# Pull down and set up the original file
	178	self.orig_filename = get_orig_filename(
	179	self.entry, self.workbench)
	180	self.name_builder = FilenameBuilder(self.orig_filename)
	181
	182	self.spectrogram_tmp = os.path.join(self.workbench.dir,
	183	self.name_builder.fill(
	184	'{basename}-spectrogram.jpg'))
	185
	186	self.transcoder = AudioTranscoder()
	187	self.thumbnailer = AudioThumbnailer()
	188
	189	def copy_original(self):
	190	if self.audio_config['keep_original']:
	191	copy_original(
	192	self.entry, self.orig_filename,
	193	self.name_builder.fill('{basename}{ext}'))
	194
	195	def transcode(self, quality=None):
	196	if not quality:
	197	quality = self.audio_config['quality']
	198
	199	progress_callback = ProgressCallback(self.entry)
	200	webm_audio_tmp = os.path.join(self.workbench.dir,
	201	self.name_builder.fill(
	202	'{basename}{ext}'))
	203
	204	webm_audio_filepath = create_pub_filepath(
	205	self.entry,
	206	'{original}.webm'.format(
	207	original=os.path.splitext(
	208	self.orig_filename[-1])[0]))
	209
	210	self.transcoder.transcode(
	211	self.orig_filename,
	212	webm_audio_tmp,
	213	quality=quality,
	214	progress_callback=progress_callback)
	215
	216	self.transcoder.discover(webm_audio_tmp)
	217
	218	_log.debug('Saving medium...')
	219	store_public(self.entry, 'medium', webm_audio_tmp,
	220	webm_audio_filepath)
	221
	222	def create_spectrogram(self, quality=None, max_width=None, fft_size=None):
	223	if not quality:
	224	quality = self.audio_config['quality']
	225	if not max_width:
	226	max_width = mgg.global_config['media:medium']['max_width']
	227	if not fft_size:
228	fft_size = self.audio_config['spectrogram_fft_size']
229
230	spectrogram_filepath = create_pub_filepath(
231	self.entry,
232	'{original}-spectrogram.jpg'.format(
233	original=os.path.splitext(
234	self.orig_filename[-1])[0]))
235
236	wav_tmp = os.path.join(self.workbench.dir, self.name_builder.fill(
237	'{basename}.ogg'))
238
239	_log.info('Creating OGG source for spectrogram')
240	self.transcoder.transcode(
241	self.orig_filename,
242	wav_tmp,
243	mux_string='vorbisenc quality={0} ! oggmux'.format(quality))
244
245	self.thumbnailer.spectrogram(
246	wav_tmp,
247	self.spectrogram_tmp,
248	width=max_width,
249	fft_size=fft_size)
250
251	_log.debug('Saving spectrogram...')
252	store_public(self.entry, 'spectrogram', self.spectrogram_tmp,
253	spectrogram_filepath)
254
255	def generate_thumb(self, size=None):
256	if not size:
257	max_width = mgg.global_config['medium:thumb']['max_width']
258	max_height = mgg.global_config['medium:thumb']['max_height']
259	size = (max_width, max_height)
260
261	thumb_tmp = os.path.join(self.workbench.dir, self.name_builder.fill(
262	'{basename}-thumbnail.jpg'))
263
264	self.thumbnailer.thumbnail_spectrogram(
265	self.spectrogram_tmp,
266	thumb_tmp,
267	size)
268
269	thumb_filepath = create_pub_filepath(
270	self.entry,
271	'{original}-thumbnail.jpg'.format(
272	original=os.path.splitext(
273	self.orig_filename[-1])[0]))
274
275	store_public(self.entry, 'thumb', thumb_tmp, thumb_filepath)
276
277
278	class InitialProcessor(CommonAudioProcessor):
279	"""
280	Initial processing steps for new audio
281	"""
282	name = "initial"
283	description = "Initial processing"
284
285	@classmethod
286	def media_is_eligible(cls, entry=None, state=None):
287	"""
288	Determine if this media type is eligible for processing
289	"""
290	if not state:
291	state = entry.state
292	return state in (
293	"unprocessed", "failed")
294
295	@classmethod
296	def generate_parser(cls):
297	parser = argparse.ArgumentParser(
298	description=cls.description,
299	prog=cls.name)
300
301	parser.add_argument(
302	'--quality',
303	help='vorbisenc quality')
304
305	parser.add_argument(
306	'--fft_size',
307	type=int,
308	help='spectrogram fft size')
309
310	parser.add_argument(
311	'--thumb_size',
312	metavar=('max_width', 'max_height'),
313	type=int)
314
315	parser.add_argument(
316	'--medium_width',
317	type=int,
318	help='The width of the spectogram')
319
320	parser.add_argument(
321	'--create_spectrogram',
322	action='store_true',
323	help='Create spectogram and thumbnail')
324
325	return parser
326
327	@classmethod
328	def args_to_request(cls, args):
329	return request_from_args(
330	args, ['create_spectrogram', 'quality', 'fft_size',
331	'thumb_size', 'medium_width'])
332
333	def process(self, quality=None, fft_size=None, thumb_size=None,
334	create_spectrogram=None, medium_width=None):
335	if not create_spectrogram:
336	create_spectrogram = self.audio_config['create_spectrogram']
337
338	self.common_setup()
339	self.transcode(quality=quality)
340	self.copy_original()
341
342	if create_spectrogram:
343	self.create_spectrogram(quality=quality, max_width=medium_width,
344	fft_size=fft_size)
345	self.generate_thumb(size=thumb_size)
346	self.delete_queue_file()
347
348
349	class AudioProcessingManager(ProcessingManager):
350	def __init__(self):
351	super(self.__class__, self).__init__()
352	self.add_processor(InitialProcessor)