mediagoblin/media_types/image/processing.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 try:
  18     from PIL import Image
  19 except ImportError:
  20     import Image
  21 import os
  22 import logging
  23 import argparse
  24
  25 from mediagoblin import mg_globals as mgg
  26 from mediagoblin.db.models import MediaEntry
  27 from mediagoblin.processing import (
  28     BadMediaFail, FilenameBuilder,
  29     MediaProcessor, ProcessingManager,
  30     request_from_args, get_orig_filename,
  31     store_public, copy_original)
  32 from mediagoblin.submit.lib import run_process_media
  33 from mediagoblin.tools.exif import exif_fix_image_orientation, \
  34     extract_exif, clean_exif, get_gps_data, get_useful, \
  35     exif_image_needs_rotation
  36 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
  37
  38 _log = logging.getLogger(__name__)
  39
  40 PIL_FILTERS = {
  41     'NEAREST': Image.NEAREST,
  42     'BILINEAR': Image.BILINEAR,
  43     'BICUBIC': Image.BICUBIC,
  44     'ANTIALIAS': Image.ANTIALIAS}
  45
  46 MEDIA_TYPE = 'mediagoblin.media_types.image'
  47
  48
  49 def resize_image(entry, resized, keyname, target_name, new_size,
  50                  exif_tags, workdir):
  51     """
  52     Store a resized version of an image and return its pathname.
  53
  54     Arguments:
  55     proc_state -- the processing state for the image to resize
  56     resized -- an image from Image.open() of the original image being resized
  57     keyname -- Under what key to save in the db.
  58     target_name -- public file path for the new resized image
  59     exif_tags -- EXIF data for the original image
  60     workdir -- directory path for storing converted image files
  61     new_size -- 2-tuple size for the resized image
  62     """
  63     config = mgg.global_config['media_type:mediagoblin.media_types.image']
  64
  65     resized = exif_fix_image_orientation(resized, exif_tags)  # Fix orientation
  66
  67     filter_config = config['resize_filter']
  68     try:
  69         resize_filter = PIL_FILTERS[filter_config.upper()]
  70     except KeyError:
  71         raise Exception('Filter "{0}" not found, choose one of {1}'.format(
  72             unicode(filter_config),
  73             u', '.join(PIL_FILTERS.keys())))
  74
  75     resized.thumbnail(new_size, resize_filter)
  76
  77     # Copy the new file to the conversion subdir, then remotely.
  78     tmp_resized_filename = os.path.join(workdir, target_name)
  79     with file(tmp_resized_filename, 'w') as resized_file:
  80         resized.save(resized_file, quality=config['quality'])
  81     store_public(entry, keyname, tmp_resized_filename, target_name)
  82
  83
  84 def resize_tool(entry,
  85                 force, keyname, target_name,
  86                 conversions_subdir, exif_tags, new_size=None):
  87     # filename -- the filename of the original image being resized
  88     filename = target_name
  89
  90     # Use the default size if new_size was not given
  91     if not new_size:
  92         max_width = mgg.global_config['media:' + keyname]['max_width']
  93         max_height = mgg.global_config['media:' + keyname]['max_height']
  94         new_size = (max_width, max_height)
  95
  96     # If the size of the original file exceeds the specified size for the desized
  97     # file, a target_name file is created and later associated with the media
  98     # entry.
  99     # Also created if the file needs rotation, or if forced.
 100     try:
 101         im = Image.open(filename)
 102     except IOError:
 103         raise BadMediaFail()
 104     if force \
 105         or im.size[0] > new_size[0]\
 106         or im.size[1] > new_size[1]\
 107         or exif_image_needs_rotation(exif_tags):
 108         resize_image(
 109             entry, im, unicode(keyname), target_name,
 110             new_size,
 111             exif_tags, conversions_subdir)
 112
 113
 114 SUPPORTED_FILETYPES = ['png', 'gif', 'jpg', 'jpeg', 'tiff']
 115
 116
 117 def sniff_handler(media_file, **kw):
 118     _log.info('Sniffing {0}'.format(MEDIA_TYPE))
 119     if kw.get('media') is not None:  # That's a double negative!
 120         name, ext = os.path.splitext(kw['media'].filename)
 121         clean_ext = ext[1:].lower()  # Strip the . from ext and make lowercase
 122
 123         if clean_ext in SUPPORTED_FILETYPES:
 124             _log.info('Found file extension in supported filetypes')
 125             return MEDIA_TYPE
 126         else:
 127             _log.debug('Media present, extension not found in {0}'.format(
 128                     SUPPORTED_FILETYPES))
 129     else:
 130         _log.warning('Need additional information (keyword argument \'media\')'
 131                      ' to be able to handle sniffing')
 132
 133     return None
 134
 135
 136 class ProcessImage(object):
 137     """Code to process an image. Will be run by celery.
 138
 139     A Workbench() represents a local tempory dir. It is automatically
 140     cleaned up when this function exits.
 141     """
 142     def __init__(self, proc_state=None):
 143         if proc_state:
 144             self.proc_state = proc_state
 145             self.entry = proc_state.entry
 146             self.workbench = proc_state.workbench
 147
 148             # Conversions subdirectory to avoid collisions
 149             self.conversions_subdir = os.path.join(
 150                 self.workbench.dir, 'convirsions')
 151
 152             self.orig_filename = proc_state.get_orig_filename()
 153             self.name_builder = FilenameBuilder(self.orig_filename)
 154
 155             # Exif extraction
 156             self.exif_tags = extract_exif(self.orig_filename)
 157
 158             os.mkdir(self.conversions_subdir)
 159
 160     def reprocess_action(self, args):
 161         """
 162         List the available actions for media in a given state
 163         """
 164         if args[0].state == 'processed':
 165             print _('\n Available reprocessing actions for processed images:'
 166                     '\n \t --resize: thumb or medium'
 167                     '\n Options:'
 168                     '\n \t --size: max_width max_height (defaults to'
 169                     'config specs)')
 170             return True
 171
 172     def _parser(self, args):
 173         """
 174         Parses the unknown args from the gmg parser
 175         """
 176         parser = argparse.ArgumentParser()
 177         parser.add_argument(
 178             '--resize',
 179             choices=['thumb', 'medium'])
 180         parser.add_argument(
 181             '--size',
 182             nargs=2,
 183             metavar=('max_width', 'max_height'),
 184             type=int)
 185         parser.add_argument(
 186             '--initial_processing',
 187             action='store_true')
 188
 189         return parser.parse_args(args[1])
 190
 191     def _check_eligible(self, entry_args, reprocess_args):
 192         """
 193         Check to see if we can actually process the given media as requested
 194         """
 195
 196         if entry_args.state == 'processed':
 197             if reprocess_args.initial_processing:
 198                 raise Exception(_('You can not run --initial_processing on'
 199                                   ' media that has already been processed.'))
 200
 201         if entry_args.state == 'failed':
 202             if reprocess_args.resize:
 203                 raise Exception(_('You can not run --resize on media that has'
 204                                   ' not been processed.'))
 205             if reprocess_args.size:
 206                 _log.warn('With --initial_processing, the --size flag will be'
 207                           ' ignored.')
 208
 209         if entry_args.state == 'processing':
 210             raise Exception(_('We currently do not support reprocessing on'
 211                               ' media that is in the "processing" state.'))
 212
 213     def initial_processing(self):
 214         # Is there any GPS data
 215         gps_data = get_gps_data(self.exif_tags)
 216
 217          # Always create a small thumbnail
 218         resize_tool(self.proc_state, True, 'thumb', self.orig_filename,
 219                     self.name_builder.fill('{basename}.thumbnail{ext}'),
 220                     self.conversions_subdir, self.exif_tags)
 221
 222         # Possibly create a medium
 223         resize_tool(self.proc_state, False, 'medium', self.orig_filename,
 224                     self.name_builder.fill('{basename}.medium{ext}'),
 225                     self.conversions_subdir, self.exif_tags)
 226
 227         # Copy our queued local workbench to its final destination
 228         self.proc_state.copy_original(self.name_builder.fill('{basename}{ext}'))
 229
 230         # Remove queued media file from storage and database
 231         self.proc_state.delete_queue_file()
 232
 233         # Insert exif data into database
 234         exif_all = clean_exif(self.exif_tags)
 235
 236         if len(exif_all):
 237             self.entry.media_data_init(exif_all=exif_all)
 238
 239         if len(gps_data):
 240             for key in list(gps_data.keys()):
 241                 gps_data['gps_' + key] = gps_data.pop(key)
 242             self.entry.media_data_init(**gps_data)
 243
 244     def reprocess(self, reprocess_info):
 245         """
 246         This function actually does the reprocessing when called by
 247         ProcessMedia in gmg/processing/task.py
 248         """
 249         new_size = None
 250
 251         # Did they specify a size? They must specify either both or none, so
 252         # we only need to check if one is present
 253         if reprocess_info.get('max_width'):
 254             max_width = reprocess_info['max_width']
 255             max_height = reprocess_info['max_height']
 256
 257             new_size = (max_width, max_height)
 258
 259         resize_tool(self.proc_state, False, reprocess_info['resize'],
 260                     self.name_builder.fill('{basename}.medium{ext}'),
 261                     self.conversions_subdir, self.exif_tags, new_size)
 262
 263     def media_reprocess(self, args):
 264         """
 265         This function handles the all of the reprocessing logic, before calling
 266         gmg/submit/lib/run_process_media
 267         """
 268         reprocess_args = self._parser(args)
 269         entry_args = args[0]
 270
 271         # Can we actually process the given media as requested?
 272         self._check_eligible(entry_args, reprocess_args)
 273
 274         # Do we want to re-try initial processing?
 275         if reprocess_args.initial_processing:
 276             for id in entry_args.media_id:
 277                 entry = MediaEntry.query.filter_by(id=id).first()
 278                 run_process_media(entry)
 279
 280         # Are we wanting to resize the thumbnail or medium?
 281         elif reprocess_args.resize:
 282
 283             # reprocess all given media entries
 284             for id in entry_args.media_id:
 285                 entry = MediaEntry.query.filter_by(id=id).first()
 286
 287                 # For now we can only reprocess with the original file
 288                 if not entry.media_files.get('original'):
 289                     raise Exception(_('The original file for this media entry'
 290                                       ' does not exist.'))
 291
 292                 reprocess_info = self._get_reprocess_info(reprocess_args)
 293                 run_process_media(entry, reprocess_info=reprocess_info)
 294
 295         # If we are here, they forgot to tell us how to reprocess
 296         else:
 297             _log.warn('You must set either --resize or --initial_processing'
 298                       ' flag to reprocess an image.')
 299
 300     def _get_reprocess_info(self, args):
 301         """ Returns a dict with the info needed for reprocessing"""
 302         reprocess_info = {'resize': args.resize}
 303
 304         if args.size:
 305             reprocess_info['max_width'] = args.size[0]
 306             reprocess_info['max_height'] = args.size[1]
 307
 308         return reprocess_info
 309
 310
 311 class CommonImageProcessor(MediaProcessor):
 312     """
 313     Provides a base for various media processing steps
 314     """
 315     # Common resizing step
 316     def resize_step(self):
 317         pass
 318
 319     @classmethod
 320     def _add_width_height_args(cls, parser):
 321         parser.add_argument(
 322             "--width", default=None,
 323             help=(
 324                 "Width of the resized image (if not using defaults)"))
 325         parser.add_argument(
 326             "--height", default=None,
 327             help=(
 328                 "Height of the resized image (if not using defaults)"))
 329
 330     def common_setup(self):
 331         """
 332         Set up the workbench directory and pull down the original file
 333         """
 334         ## @@: Should this be two functions?
 335         # Conversions subdirectory to avoid collisions
 336         self.conversions_subdir = os.path.join(
 337             self.workbench.dir, 'convirsions')
 338         os.mkdir(self.conversions_subdir)
 339
 340         # Pull down and set up the original file
 341         self.orig_filename = get_orig_filename(
 342             self.entry, self.workbench)
 343         self.name_builder = FilenameBuilder(self.orig_filename)
 344
 345         # Exif extraction
 346         self.exif_tags = extract_exif(self.orig_filename)
 347
 348
 349     def generate_medium_if_applicable(self, size=None):
 350         resize_tool(self.entry, False, 'medium', self.orig_filename,
 351                     self.name_builder.fill('{basename}.medium{ext}'),
 352                     self.conversions_subdir, self.exif_tags)
 353
 354     def generate_thumb(self, size=None):
 355         resize_tool(self.entry, True, 'thumb', self.orig_filename,
 356                     self.name_builder.fill('{basename}.thumbnail{ext}'),
 357                     self.conversions_subdir, self.exif_tags)
 358
 359     def copy_original(self):
 360         copy_original(
 361             self.entry, self.orig_filename,
 362             self.name_builder.fill('{basename}{ext}'))
 363
 364     def extract_metadata(self):
 365         # Is there any GPS data
 366         gps_data = get_gps_data(self.exif_tags)
 367
 368         # Insert exif data into database
 369         exif_all = clean_exif(self.exif_tags)
 370
 371         if len(exif_all):
 372             self.entry.media_data_init(exif_all=exif_all)
 373
 374         if len(gps_data):
 375             for key in list(gps_data.keys()):
 376                 gps_data['gps_' + key] = gps_data.pop(key)
 377             self.entry.media_data_init(**gps_data)
 378
 379
 380 class InitialProcessor(CommonImageProcessor):
 381     """
 382     Initial processing step for new images
 383     """
 384     name = "initial"
 385     description = "Initial processing"
 386
 387     @classmethod
 388     def media_is_eligible(cls, entry):
 389         """
 390         Determine if this media type is eligible for processing
 391         """
 392         return entry.state in (
 393             "unprocessed", "failed")
 394
 395     ###############################
 396     # Command line interface things
 397     ###############################
 398
 399     @classmethod
 400     def generate_parser(cls):
 401         parser = argparse.ArgumentParser(
 402             description=cls.description,
 403             prog=cls.name)
 404
 405         parser.add_argument(
 406             '--size',
 407             nargs=2,
 408             metavar=('max_width', 'max_height'),
 409             type=int)
 410
 411         parser.add_argument(
 412             '--thumb-size',
 413             nargs=2,
 414             type=int)
 415
 416         return parser
 417
 418     @classmethod
 419     def args_to_request(cls, args):
 420         return request_from_args(
 421             args, ['size', 'thumb_size'])
 422
 423
 424     def process(self, size=None, thumb_size=None):
 425         self.common_setup()
 426         self.generate_medium_if_applicable(size=size)
 427         self.generate_thumb(size=thumb_size)
 428         self.extract_metadata()
 429
 430
 431 class ImageProcessingManager(ProcessingManager):
 432     def __init__(self):
 433         super(self.__class__, self).__init__()
 434         self.add_processor(InitialProcessor)
 435
 436
 437 if __name__ == '__main__':
 438     import sys
 439     import pprint
 440
 441     pp = pprint.PrettyPrinter()
 442
 443     result = extract_exif(sys.argv[1])
 444     gps = get_gps_data(result)
 445     clean = clean_exif(result)
 446     useful = get_useful(clean)
 447
 448     print pp.pprint(
 449         clean)