Added initial processor for ascii media
authorRodney Ewing <ewing.rj@gmail.com>
Wed, 14 Aug 2013 00:32:59 +0000 (17:32 -0700)
committerRodney Ewing <ewing.rj@gmail.com>
Fri, 16 Aug 2013 22:30:20 +0000 (15:30 -0700)
mediagoblin/media_types/ascii/processing.py

index aca784e8c6c375ba30391122eb91a1bf1d41a33d..4cf8081a9fd96126f4120a06447cca26d39e08f5 100644 (file)
@@ -13,6 +13,7 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import argparse
 import chardet
 import os
 try:
@@ -22,7 +23,11 @@ except ImportError:
 import logging
 
 from mediagoblin import mg_globals as mgg
-from mediagoblin.processing import create_pub_filepath
+from mediagoblin.processing import (
+    create_pub_filepath, FilenameBuilder,
+    MediaProcessor, ProcessingManager,
+    get_orig_filename, copy_original,
+    store_public, request_from_args)
 from mediagoblin.media_types.ascii import asciitoimage
 
 _log = logging.getLogger(__name__)
@@ -43,106 +48,148 @@ def sniff_handler(media_file, **kw):
     return None
 
 
-def process_ascii(proc_state):
-    """Code to process a txt file. Will be run by celery.
-
-    A Workbench() represents a local tempory dir. It is automatically
-    cleaned up when this function exits.
+class CommonAsciiProcessor(MediaProcessor):
+    """
+    Provides a base for various ascii processing steps
     """
-    entry = proc_state.entry
-    workbench = proc_state.workbench
-    ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii']
-    # Conversions subdirectory to avoid collisions
-    conversions_subdir = os.path.join(
-        workbench.dir, 'conversions')
-    os.mkdir(conversions_subdir)
+    def common_setup(self):
+        self.ascii_config = mgg.global_config[
+            'media_type:mediagoblin.media_types.ascii']
+
+         # Conversions subdirectory to avoid collisions
+        self.conversions_subdir = os.path.join(
+            self.workbench.dir, 'convirsions')
+        os.mkdir(self.conversions_subdir)
 
-    queued_filepath = entry.queued_media_file
-    queued_filename = workbench.localized_file(
-        mgg.queue_store, queued_filepath,
-        'source')
+        # Pull down and set up the original file
+        self.orig_filename = get_orig_filename(
+            self.entry, self.workbench)
+        self.name_builder = FilenameBuilder(self.orig_filename)
 
-    queued_file = file(queued_filename, 'rb')
+        self.charset = None
 
-    with queued_file:
-        queued_file_charset = chardet.detect(queued_file.read())
+    def copy_original(self):
+        copy_original(
+            self.entry, self.orig_filename,
+            self.name_builder.fill('{basename}{ext}'))
+
+    def _detect_charset(self, orig_file):
+        d_charset = chardet.detect(orig_file.read())
 
         # Only select a non-utf-8 charset if chardet is *really* sure
-        # Tested with "Feli\x0109an superjaron", which was detecte
-        if queued_file_charset['confidence'] < 0.9:
-            interpreted_charset = 'utf-8'
+        # Tested with "Feli\x0109an superjaron", which was detected
+        if d_charset['confidence'] < 0.9:
+            self.charset = 'utf-8'
         else:
-            interpreted_charset = queued_file_charset['encoding']
+            self.charset = d_charset['encoding']
 
         _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
-                queued_file_charset,
-                interpreted_charset))
-
-        queued_file.seek(0)  # Rewind the queued file
-
-        thumb_filepath = create_pub_filepath(
-            entry, 'thumbnail.png')
-
-        tmp_thumb_filename = os.path.join(
-            conversions_subdir, thumb_filepath[-1])
-
-        ascii_converter_args = {}
-
-        if ascii_config['thumbnail_font']:
-            ascii_converter_args.update(
-                    {'font': ascii_config['thumbnail_font']})
-
-        converter = asciitoimage.AsciiToImage(
-               **ascii_converter_args)
-
-        thumb = converter._create_image(
-            queued_file.read())
-
-        with file(tmp_thumb_filename, 'w') as thumb_file:
-            thumb.thumbnail(
-                (mgg.global_config['media:thumb']['max_width'],
-                 mgg.global_config['media:thumb']['max_height']),
-                Image.ANTIALIAS)
-            thumb.save(thumb_file)
-
-        _log.debug('Copying local file to public storage')
-        mgg.public_store.copy_local_to_storage(
-            tmp_thumb_filename, thumb_filepath)
-
-        queued_file.seek(0)
-
-        original_filepath = create_pub_filepath(entry, queued_filepath[-1])
-
-        with mgg.public_store.get_file(original_filepath, 'wb') \
-            as original_file:
-            original_file.write(queued_file.read())
-
-        queued_file.seek(0)  # Rewind *again*
-
-        unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
-
-        with mgg.public_store.get_file(unicode_filepath, 'wb') \
-                as unicode_file:
-            # Decode the original file from its detected charset (or UTF8)
-            # Encode the unicode instance to ASCII and replace any non-ASCII
-            # with an HTML entity (&#
-            unicode_file.write(
-                unicode(queued_file.read().decode(
-                        interpreted_charset)).encode(
-                    'ascii',
-                    'xmlcharrefreplace'))
-
-    # Remove queued media file from storage and database.
-    # queued_filepath is in the task_id directory which should
-    # be removed too, but fail if the directory is not empty to be on
-    # the super-safe side.
-    mgg.queue_store.delete_file(queued_filepath)      # rm file
-    mgg.queue_store.delete_dir(queued_filepath[:-1])  # rm dir
-    entry.queued_media_file = []
-
-    media_files_dict = entry.setdefault('media_files', {})
-    media_files_dict['thumb'] = thumb_filepath
-    media_files_dict['unicode'] = unicode_filepath
-    media_files_dict['original'] = original_filepath
-
-    entry.save()
+                  d_charset,
+                  self.charset))
+
+    def store_unicode_file(self):
+        with file(self.orig_filename, 'rb') as orig_file:
+            self._detect_charset(orig_file)
+            unicode_filepath = create_pub_filepath(self.entry,
+                                                   'ascii-portable.txt')
+
+            with mgg.public_store.get_file(unicode_filepath, 'wb') \
+                    as unicode_file:
+                # Decode the original file from its detected charset (or UTF8)
+                # Encode the unicode instance to ASCII and replace any
+                # non-ASCII with an HTML entity (&#
+                unicode_file.write(
+                    unicode(orig_file.read().decode(
+                            self.charset)).encode(
+                                'ascii',
+                                'xmlcharrefreplace'))
+
+        self.entry.media_files['unicode'] = unicode_filepath
+
+    def generate_thumb(self, font=None, thumb_size=None):
+        with file(self.orig_filename, 'rb') as orig_file:
+            # If no font kwarg, check config
+            if not font:
+                font = self.ascii_config.get('thumbnail_font', None)
+            if not thumb_size:
+                thumb_size = (mgg.global_config['media:thumb']['max_width'],
+                              mgg.global_config['media:thumb']['max_height'])
+
+            tmp_thumb = os.path.join(
+                self.conversions_subdir,
+                self.name_builder.fill('{basename}.thumbnail.png'))
+
+            ascii_converter_args = {}
+
+            # If there is a font from either the config or kwarg, update
+            # ascii_converter_args
+            if font:
+                ascii_converter_args.update(
+                    {'font': self.ascii_config['thumbnail_font']})
+
+            converter = asciitoimage.AsciiToImage(
+                **ascii_converter_args)
+
+            thumb = converter._create_image(
+                orig_file.read())
+
+            with file(tmp_thumb, 'w') as thumb_file:
+                thumb.thumbnail(
+                    thumb_size,
+                    Image.ANTIALIAS)
+                thumb.save(thumb_file)
+
+            _log.debug('Copying local file to public storage')
+            store_public(self.entry, 'thumb', tmp_thumb,
+                         self.name_builder.fill('{basename}.thumbnail.jpg'))
+
+
+class InitialProcessor(CommonAsciiProcessor):
+    """
+    Initial processing step for new ascii media
+    """
+    name = "initial"
+    description = "Initial processing"
+
+    @classmethod
+    def media_is_eligible(cls, entry=None, state=None):
+        if not state:
+            state = entry.state
+        return state in (
+            "unprocessed", "failed")
+
+    @classmethod
+    def generate_parser(cls):
+        parser = argparse.ArgumentParser(
+            description=cls.description,
+            prog=cls.name)
+
+        parser.add_argument(
+            '--thumb_size',
+            nargs=2,
+            metavar=('max_width', 'max_width'),
+            type=int)
+
+        parser.add_argument(
+            '--font',
+            help='the thumbnail font')
+
+        return parser
+
+    @classmethod
+    def args_to_request(cls, args):
+        return request_from_args(
+            args, ['thumb_size', 'font'])
+
+    def process(self, thumb_size=None, font=None):
+        self.common_setup()
+        self.store_unicode_file()
+        self.generate_thumb(thumb_size=thumb_size, font=font)
+        self.copy_original()
+        self.delete_queue_file()
+
+
+class AsciiProcessingManager(ProcessingManager):
+    def __init__(self):
+        super(self.__class__, self).__init__()
+        self.add_processor(InitialProcessor)