mediagoblin/media_types/ascii/processing.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16 import chardet
  17 import os
  18 try:
  19     from PIL import Image
  20 except ImportError:
  21     import Image
  22 import logging
  23
  24 from mediagoblin import mg_globals as mgg
  25 from mediagoblin.processing import create_pub_filepath
  26 from mediagoblin.media_types.ascii import asciitoimage
  27
  28 _log = logging.getLogger(__name__)
  29
  30 SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo']
  31
  32
  33 def sniff_handler(media_file, **kw):
  34     if kw.get('media') is not None:
  35         name, ext = os.path.splitext(kw['media'].filename)
  36         clean_ext = ext[1:].lower()
  37
  38         if clean_ext in SUPPORTED_EXTENSIONS:
  39             return True
  40
  41     return False
  42
  43
  44 def process_ascii(proc_state):
  45     """Code to process a txt file. Will be run by celery.
  46
  47     A Workbench() represents a local tempory dir. It is automatically
  48     cleaned up when this function exits.
  49     """
  50     entry = proc_state.entry
  51     workbench = proc_state.workbench
  52     ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii']
  53     # Conversions subdirectory to avoid collisions
  54     conversions_subdir = os.path.join(
  55         workbench.dir, 'conversions')
  56     os.mkdir(conversions_subdir)
  57
  58     queued_filepath = entry.queued_media_file
  59     queued_filename = workbench.localized_file(
  60         mgg.queue_store, queued_filepath,
  61         'source')
  62
  63     queued_file = file(queued_filename, 'rb')
  64
  65     with queued_file:
  66         queued_file_charset = chardet.detect(queued_file.read())
  67
  68         # Only select a non-utf-8 charset if chardet is *really* sure
  69         # Tested with "Feli\x0109an superjaron", which was detecte
  70         if queued_file_charset['confidence'] < 0.9:
  71             interpreted_charset = 'utf-8'
  72         else:
  73             interpreted_charset = queued_file_charset['encoding']
  74
  75         _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
  76                 queued_file_charset,
  77                 interpreted_charset))
  78
  79         queued_file.seek(0)  # Rewind the queued file
  80
  81         thumb_filepath = create_pub_filepath(
  82             entry, 'thumbnail.png')
  83
  84         tmp_thumb_filename = os.path.join(
  85             conversions_subdir, thumb_filepath[-1])
  86
  87         ascii_converter_args = {}
  88
  89         if ascii_config['thumbnail_font']:
  90             ascii_converter_args.update(
  91                     {'font': ascii_config['thumbnail_font']})
  92
  93         converter = asciitoimage.AsciiToImage(
  94                **ascii_converter_args)
  95
  96         thumb = converter._create_image(
  97             queued_file.read())
  98
  99         with file(tmp_thumb_filename, 'w') as thumb_file:
 100             thumb.thumbnail(
 101                 (mgg.global_config['media:thumb']['max_width'],
 102                  mgg.global_config['media:thumb']['max_height']),
 103                 Image.ANTIALIAS)
 104             thumb.save(thumb_file)
 105
 106         _log.debug('Copying local file to public storage')
 107         mgg.public_store.copy_local_to_storage(
 108             tmp_thumb_filename, thumb_filepath)
 109
 110         queued_file.seek(0)
 111
 112         original_filepath = create_pub_filepath(entry, queued_filepath[-1])
 113
 114         with mgg.public_store.get_file(original_filepath, 'wb') \
 115             as original_file:
 116             original_file.write(queued_file.read())
 117
 118         queued_file.seek(0)  # Rewind *again*
 119
 120         unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
 121
 122         with mgg.public_store.get_file(unicode_filepath, 'wb') \
 123                 as unicode_file:
 124             # Decode the original file from its detected charset (or UTF8)
 125             # Encode the unicode instance to ASCII and replace any non-ASCII
 126             # with an HTML entity (&#
 127             unicode_file.write(
 128                 unicode(queued_file.read().decode(
 129                         interpreted_charset)).encode(
 130                     'ascii',
 131                     'xmlcharrefreplace'))
 132
 133     # Remove queued media file from storage and database.
 134     # queued_filepath is in the task_id directory which should
 135     # be removed too, but fail if the directory is not empty to be on
 136     # the super-safe side.
 137     mgg.queue_store.delete_file(queued_filepath)      # rm file
 138     mgg.queue_store.delete_dir(queued_filepath[:-1])  # rm dir
 139     entry.queued_media_file = []
 140
 141     media_files_dict = entry.setdefault('media_files', {})
 142     media_files_dict['thumb'] = thumb_filepath
 143     media_files_dict['unicode'] = unicode_filepath
 144     media_files_dict['original'] = original_filepath
 145
 146     entry.save()