From 35d6a95008ac63a00cc2e4d7fac8187bc58eea9a Mon Sep 17 00:00:00 2001 From: Rodney Ewing Date: Tue, 13 Aug 2013 17:32:59 -0700 Subject: [PATCH] Added initial processor for ascii media --- mediagoblin/media_types/ascii/processing.py | 237 ++++++++++++-------- 1 file changed, 142 insertions(+), 95 deletions(-) diff --git a/mediagoblin/media_types/ascii/processing.py b/mediagoblin/media_types/ascii/processing.py index aca784e8..4cf8081a 100644 --- a/mediagoblin/media_types/ascii/processing.py +++ b/mediagoblin/media_types/ascii/processing.py @@ -13,6 +13,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import argparse import chardet import os try: @@ -22,7 +23,11 @@ except ImportError: import logging from mediagoblin import mg_globals as mgg -from mediagoblin.processing import create_pub_filepath +from mediagoblin.processing import ( + create_pub_filepath, FilenameBuilder, + MediaProcessor, ProcessingManager, + get_orig_filename, copy_original, + store_public, request_from_args) from mediagoblin.media_types.ascii import asciitoimage _log = logging.getLogger(__name__) @@ -43,106 +48,148 @@ def sniff_handler(media_file, **kw): return None -def process_ascii(proc_state): - """Code to process a txt file. Will be run by celery. - - A Workbench() represents a local tempory dir. It is automatically - cleaned up when this function exits. +class CommonAsciiProcessor(MediaProcessor): + """ + Provides a base for various ascii processing steps """ - entry = proc_state.entry - workbench = proc_state.workbench - ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii'] - # Conversions subdirectory to avoid collisions - conversions_subdir = os.path.join( - workbench.dir, 'conversions') - os.mkdir(conversions_subdir) + def common_setup(self): + self.ascii_config = mgg.global_config[ + 'media_type:mediagoblin.media_types.ascii'] + + # Conversions subdirectory to avoid collisions + self.conversions_subdir = os.path.join( + self.workbench.dir, 'convirsions') + os.mkdir(self.conversions_subdir) - queued_filepath = entry.queued_media_file - queued_filename = workbench.localized_file( - mgg.queue_store, queued_filepath, - 'source') + # Pull down and set up the original file + self.orig_filename = get_orig_filename( + self.entry, self.workbench) + self.name_builder = FilenameBuilder(self.orig_filename) - queued_file = file(queued_filename, 'rb') + self.charset = None - with queued_file: - queued_file_charset = chardet.detect(queued_file.read()) + def copy_original(self): + copy_original( + self.entry, self.orig_filename, + self.name_builder.fill('{basename}{ext}')) + + def _detect_charset(self, orig_file): + d_charset = chardet.detect(orig_file.read()) # Only select a non-utf-8 charset if chardet is *really* sure - # Tested with "Feli\x0109an superjaron", which was detecte - if queued_file_charset['confidence'] < 0.9: - interpreted_charset = 'utf-8' + # Tested with "Feli\x0109an superjaron", which was detected + if d_charset['confidence'] < 0.9: + self.charset = 'utf-8' else: - interpreted_charset = queued_file_charset['encoding'] + self.charset = d_charset['encoding'] _log.info('Charset detected: {0}\nWill interpret as: {1}'.format( - queued_file_charset, - interpreted_charset)) - - queued_file.seek(0) # Rewind the queued file - - thumb_filepath = create_pub_filepath( - entry, 'thumbnail.png') - - tmp_thumb_filename = os.path.join( - conversions_subdir, thumb_filepath[-1]) - - ascii_converter_args = {} - - if ascii_config['thumbnail_font']: - ascii_converter_args.update( - {'font': ascii_config['thumbnail_font']}) - - converter = asciitoimage.AsciiToImage( - **ascii_converter_args) - - thumb = converter._create_image( - queued_file.read()) - - with file(tmp_thumb_filename, 'w') as thumb_file: - thumb.thumbnail( - (mgg.global_config['media:thumb']['max_width'], - mgg.global_config['media:thumb']['max_height']), - Image.ANTIALIAS) - thumb.save(thumb_file) - - _log.debug('Copying local file to public storage') - mgg.public_store.copy_local_to_storage( - tmp_thumb_filename, thumb_filepath) - - queued_file.seek(0) - - original_filepath = create_pub_filepath(entry, queued_filepath[-1]) - - with mgg.public_store.get_file(original_filepath, 'wb') \ - as original_file: - original_file.write(queued_file.read()) - - queued_file.seek(0) # Rewind *again* - - unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt') - - with mgg.public_store.get_file(unicode_filepath, 'wb') \ - as unicode_file: - # Decode the original file from its detected charset (or UTF8) - # Encode the unicode instance to ASCII and replace any non-ASCII - # with an HTML entity (&# - unicode_file.write( - unicode(queued_file.read().decode( - interpreted_charset)).encode( - 'ascii', - 'xmlcharrefreplace')) - - # Remove queued media file from storage and database. - # queued_filepath is in the task_id directory which should - # be removed too, but fail if the directory is not empty to be on - # the super-safe side. - mgg.queue_store.delete_file(queued_filepath) # rm file - mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir - entry.queued_media_file = [] - - media_files_dict = entry.setdefault('media_files', {}) - media_files_dict['thumb'] = thumb_filepath - media_files_dict['unicode'] = unicode_filepath - media_files_dict['original'] = original_filepath - - entry.save() + d_charset, + self.charset)) + + def store_unicode_file(self): + with file(self.orig_filename, 'rb') as orig_file: + self._detect_charset(orig_file) + unicode_filepath = create_pub_filepath(self.entry, + 'ascii-portable.txt') + + with mgg.public_store.get_file(unicode_filepath, 'wb') \ + as unicode_file: + # Decode the original file from its detected charset (or UTF8) + # Encode the unicode instance to ASCII and replace any + # non-ASCII with an HTML entity (&# + unicode_file.write( + unicode(orig_file.read().decode( + self.charset)).encode( + 'ascii', + 'xmlcharrefreplace')) + + self.entry.media_files['unicode'] = unicode_filepath + + def generate_thumb(self, font=None, thumb_size=None): + with file(self.orig_filename, 'rb') as orig_file: + # If no font kwarg, check config + if not font: + font = self.ascii_config.get('thumbnail_font', None) + if not thumb_size: + thumb_size = (mgg.global_config['media:thumb']['max_width'], + mgg.global_config['media:thumb']['max_height']) + + tmp_thumb = os.path.join( + self.conversions_subdir, + self.name_builder.fill('{basename}.thumbnail.png')) + + ascii_converter_args = {} + + # If there is a font from either the config or kwarg, update + # ascii_converter_args + if font: + ascii_converter_args.update( + {'font': self.ascii_config['thumbnail_font']}) + + converter = asciitoimage.AsciiToImage( + **ascii_converter_args) + + thumb = converter._create_image( + orig_file.read()) + + with file(tmp_thumb, 'w') as thumb_file: + thumb.thumbnail( + thumb_size, + Image.ANTIALIAS) + thumb.save(thumb_file) + + _log.debug('Copying local file to public storage') + store_public(self.entry, 'thumb', tmp_thumb, + self.name_builder.fill('{basename}.thumbnail.jpg')) + + +class InitialProcessor(CommonAsciiProcessor): + """ + Initial processing step for new ascii media + """ + name = "initial" + description = "Initial processing" + + @classmethod + def media_is_eligible(cls, entry=None, state=None): + if not state: + state = entry.state + return state in ( + "unprocessed", "failed") + + @classmethod + def generate_parser(cls): + parser = argparse.ArgumentParser( + description=cls.description, + prog=cls.name) + + parser.add_argument( + '--thumb_size', + nargs=2, + metavar=('max_width', 'max_width'), + type=int) + + parser.add_argument( + '--font', + help='the thumbnail font') + + return parser + + @classmethod + def args_to_request(cls, args): + return request_from_args( + args, ['thumb_size', 'font']) + + def process(self, thumb_size=None, font=None): + self.common_setup() + self.store_unicode_file() + self.generate_thumb(thumb_size=thumb_size, font=font) + self.copy_original() + self.delete_queue_file() + + +class AsciiProcessingManager(ProcessingManager): + def __init__(self): + super(self.__class__, self).__init__() + self.add_processor(InitialProcessor) -- 2.25.1