X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=mediagoblin%2Fmedia_types%2Fascii%2Fprocessing.py;h=ef4727de243d242e64fca703d8d80d44cb62f584;hb=529eb17b38ac08c956164e8b829ea556db1f32f4;hp=aca784e8c6c375ba30391122eb91a1bf1d41a33d;hpb=bc23c8b9b6aecc5d0caddc8b49efb0d01212bba1;p=mediagoblin.git diff --git a/mediagoblin/media_types/ascii/processing.py b/mediagoblin/media_types/ascii/processing.py index aca784e8..ef4727de 100644 --- a/mediagoblin/media_types/ascii/processing.py +++ b/mediagoblin/media_types/ascii/processing.py @@ -13,6 +13,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import argparse import chardet import os try: @@ -22,7 +23,11 @@ except ImportError: import logging from mediagoblin import mg_globals as mgg -from mediagoblin.processing import create_pub_filepath +from mediagoblin.processing import ( + create_pub_filepath, FilenameBuilder, + MediaProcessor, ProcessingManager, + get_process_filename, copy_original, + store_public, request_from_args) from mediagoblin.media_types.ascii import asciitoimage _log = logging.getLogger(__name__) @@ -43,106 +48,228 @@ def sniff_handler(media_file, **kw): return None -def process_ascii(proc_state): - """Code to process a txt file. Will be run by celery. - - A Workbench() represents a local tempory dir. It is automatically - cleaned up when this function exits. +class CommonAsciiProcessor(MediaProcessor): + """ + Provides a base for various ascii processing steps """ - entry = proc_state.entry - workbench = proc_state.workbench - ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii'] - # Conversions subdirectory to avoid collisions - conversions_subdir = os.path.join( - workbench.dir, 'conversions') - os.mkdir(conversions_subdir) + acceptable_files = ['original', 'unicode'] + + def common_setup(self): + self.ascii_config = mgg.global_config['plugins'][ + 'mediagoblin.media_types.ascii'] + + # Conversions subdirectory to avoid collisions + self.conversions_subdir = os.path.join( + self.workbench.dir, 'conversions') + os.mkdir(self.conversions_subdir) - queued_filepath = entry.queued_media_file - queued_filename = workbench.localized_file( - mgg.queue_store, queued_filepath, - 'source') + # Pull down and set up the processing file + self.process_filename = get_process_filename( + self.entry, self.workbench, self.acceptable_files) + self.name_builder = FilenameBuilder(self.process_filename) - queued_file = file(queued_filename, 'rb') + self.charset = None - with queued_file: - queued_file_charset = chardet.detect(queued_file.read()) + def copy_original(self): + copy_original( + self.entry, self.process_filename, + self.name_builder.fill('{basename}{ext}')) + + def _detect_charset(self, orig_file): + d_charset = chardet.detect(orig_file.read()) # Only select a non-utf-8 charset if chardet is *really* sure - # Tested with "Feli\x0109an superjaron", which was detecte - if queued_file_charset['confidence'] < 0.9: - interpreted_charset = 'utf-8' + # Tested with "Feli\x0109an superjaron", which was detected + if d_charset['confidence'] < 0.9: + self.charset = 'utf-8' else: - interpreted_charset = queued_file_charset['encoding'] + self.charset = d_charset['encoding'] _log.info('Charset detected: {0}\nWill interpret as: {1}'.format( - queued_file_charset, - interpreted_charset)) + d_charset, + self.charset)) + + # Rewind the file + orig_file.seek(0) + + def store_unicode_file(self): + with file(self.process_filename, 'rb') as orig_file: + self._detect_charset(orig_file) + unicode_filepath = create_pub_filepath(self.entry, + 'ascii-portable.txt') - queued_file.seek(0) # Rewind the queued file + with mgg.public_store.get_file(unicode_filepath, 'wb') \ + as unicode_file: + # Decode the original file from its detected charset (or UTF8) + # Encode the unicode instance to ASCII and replace any + # non-ASCII with an HTML entity (&# + unicode_file.write( + unicode(orig_file.read().decode( + self.charset)).encode( + 'ascii', + 'xmlcharrefreplace')) - thumb_filepath = create_pub_filepath( - entry, 'thumbnail.png') + self.entry.media_files['unicode'] = unicode_filepath - tmp_thumb_filename = os.path.join( - conversions_subdir, thumb_filepath[-1]) + def generate_thumb(self, font=None, thumb_size=None): + with file(self.process_filename, 'rb') as orig_file: + # If no font kwarg, check config + if not font: + font = self.ascii_config.get('thumbnail_font', None) + if not thumb_size: + thumb_size = (mgg.global_config['media:thumb']['max_width'], + mgg.global_config['media:thumb']['max_height']) - ascii_converter_args = {} + if self._skip_resizing(font, thumb_size): + return - if ascii_config['thumbnail_font']: - ascii_converter_args.update( - {'font': ascii_config['thumbnail_font']}) + tmp_thumb = os.path.join( + self.conversions_subdir, + self.name_builder.fill('{basename}.thumbnail.png')) - converter = asciitoimage.AsciiToImage( - **ascii_converter_args) + ascii_converter_args = {} - thumb = converter._create_image( - queued_file.read()) + # If there is a font from either the config or kwarg, update + # ascii_converter_args + if font: + ascii_converter_args.update( + {'font': self.ascii_config['thumbnail_font']}) - with file(tmp_thumb_filename, 'w') as thumb_file: - thumb.thumbnail( - (mgg.global_config['media:thumb']['max_width'], - mgg.global_config['media:thumb']['max_height']), - Image.ANTIALIAS) - thumb.save(thumb_file) + converter = asciitoimage.AsciiToImage( + **ascii_converter_args) - _log.debug('Copying local file to public storage') - mgg.public_store.copy_local_to_storage( - tmp_thumb_filename, thumb_filepath) + thumb = converter._create_image( + orig_file.read()) - queued_file.seek(0) + with file(tmp_thumb, 'w') as thumb_file: + thumb.thumbnail( + thumb_size, + Image.ANTIALIAS) + thumb.save(thumb_file) - original_filepath = create_pub_filepath(entry, queued_filepath[-1]) + thumb_info = {'font': font, + 'width': thumb_size[0], + 'height': thumb_size[1]} - with mgg.public_store.get_file(original_filepath, 'wb') \ - as original_file: - original_file.write(queued_file.read()) + self.entry.set_file_metadata('thumb', **thumb_info) - queued_file.seek(0) # Rewind *again* + _log.debug('Copying local file to public storage') + store_public(self.entry, 'thumb', tmp_thumb, + self.name_builder.fill('{basename}.thumbnail.jpg')) - unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt') + def _skip_resizing(self, font, thumb_size): + thumb_info = self.entry.get_file_metadata('thumb') - with mgg.public_store.get_file(unicode_filepath, 'wb') \ - as unicode_file: - # Decode the original file from its detected charset (or UTF8) - # Encode the unicode instance to ASCII and replace any non-ASCII - # with an HTML entity (&# - unicode_file.write( - unicode(queued_file.read().decode( - interpreted_charset)).encode( - 'ascii', - 'xmlcharrefreplace')) + if not thumb_info: + return False - # Remove queued media file from storage and database. - # queued_filepath is in the task_id directory which should - # be removed too, but fail if the directory is not empty to be on - # the super-safe side. - mgg.queue_store.delete_file(queued_filepath) # rm file - mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir - entry.queued_media_file = [] + skip = True - media_files_dict = entry.setdefault('media_files', {}) - media_files_dict['thumb'] = thumb_filepath - media_files_dict['unicode'] = unicode_filepath - media_files_dict['original'] = original_filepath + if thumb_info.get('font') != font: + skip = False + elif thumb_info.get('width') != thumb_size[0]: + skip = False + elif thumb_info.get('height') != thumb_size[1]: + skip = False - entry.save() + return skip + + +class InitialProcessor(CommonAsciiProcessor): + """ + Initial processing step for new ascii media + """ + name = "initial" + description = "Initial processing" + + @classmethod + def media_is_eligible(cls, entry=None, state=None): + if not state: + state = entry.state + return state in ( + "unprocessed", "failed") + + @classmethod + def generate_parser(cls): + parser = argparse.ArgumentParser( + description=cls.description, + prog=cls.name) + + parser.add_argument( + '--thumb_size', + nargs=2, + metavar=('max_width', 'max_width'), + type=int) + + parser.add_argument( + '--font', + help='the thumbnail font') + + return parser + + @classmethod + def args_to_request(cls, args): + return request_from_args( + args, ['thumb_size', 'font']) + + def process(self, thumb_size=None, font=None): + self.common_setup() + self.store_unicode_file() + self.generate_thumb(thumb_size=thumb_size, font=font) + self.copy_original() + self.delete_queue_file() + + +class Resizer(CommonAsciiProcessor): + """ + Resizing process steps for processed media + """ + name = 'resize' + description = 'Resize thumbnail' + thumb_size = 'thumb_size' + + @classmethod + def media_is_eligible(cls, entry=None, state=None): + """ + Determine if this media type is eligible for processing + """ + if not state: + state = entry.state + return state in 'processed' + + @classmethod + def generate_parser(cls): + parser = argparse.ArgumentParser( + description=cls.description, + prog=cls.name) + + parser.add_argument( + '--thumb_size', + nargs=2, + metavar=('max_width', 'max_height'), + type=int) + + # Needed for gmg reprocess thumbs to work + parser.add_argument( + 'file', + nargs='?', + default='thumb', + choices=['thumb']) + + return parser + + @classmethod + def args_to_request(cls, args): + return request_from_args( + args, ['thumb_size', 'file']) + + def process(self, thumb_size=None, file=None): + self.common_setup() + self.generate_thumb(thumb_size=thumb_size) + + +class AsciiProcessingManager(ProcessingManager): + def __init__(self): + super(self.__class__, self).__init__() + self.add_processor(InitialProcessor) + self.add_processor(Resizer)