Media type refractors, pep8, lint
[mediagoblin.git] / mediagoblin / media_types / image / processing.py
index 57eb75dbb98232fab6128f714656a9ebfab68cee..bacfecb85e820c25fd57f16eccd2167315f64f7b 100644 (file)
@@ -1,5 +1,5 @@
 # GNU MediaGoblin -- federated, autonomous media hosting
-# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import Image
+import os
+import logging
 
-from celery.task import Task
-from celery import registry
-
-from mediagoblin.db.util import ObjectId
 from mediagoblin import mg_globals as mgg
+from mediagoblin.processing import BadMediaFail, \
+    create_pub_filepath
+from mediagoblin.tools.exif import exif_fix_image_orientation, \
+    extract_exif, clean_exif, get_gps_data, get_useful
 
-from mediagoblin.util import lazy_pass_to_ugettext as _
-
-from mediagoblin.process_media.errors import *
-
-THUMB_SIZE = 180, 180
-MEDIUM_SIZE = 640, 640
-
-
-def create_pub_filepath(entry, filename):
-    return mgg.public_store.get_unique_filepath(
-            ['media_entries',
-             unicode(entry['_id']),
-             filename])
-
-################################
-# Media processing initial steps
-################################
-
-class ProcessMedia(Task):
-    """
-    Pass this entry off for processing.
-    """
-    def run(self, media_id):
-        """
-        Pass the media entry off to the appropriate processing function
-        (for now just process_image...)
-        """
-        entry = mgg.database.MediaEntry.one(
-            {'_id': ObjectId(media_id)})
-
-        # Try to process, and handle expected errors.
-        try:
-            process_image(entry)
-        except BaseProcessingFail, exc:
-            mark_entry_failed(entry[u'_id'], exc)
-            return
-
-        entry['state'] = u'processed'
-        entry.save()
-
-    def on_failure(self, exc, task_id, args, kwargs, einfo):
-        """
-        If the processing failed we should mark that in the database.
+_log = logging.getLogger(__name__)
 
-        Assuming that the exception raised is a subclass of BaseProcessingFail,
-        we can use that to get more information about the failure and store that
-        for conveying information to users about the failure, etc.
-        """
-        entry_id = args[0]
-        mark_entry_failed(entry_id, exc)
+SUPPORTED_FILETYPES = ['png', 'gif', 'jpg', 'jpeg']
 
 
-process_media = registry.tasks[ProcessMedia.name]
-
-
-def mark_entry_failed(entry_id, exc):
-    """
-    Mark a media entry as having failed in its conversion.
-
-    Uses the exception that was raised to mark more information.  If the
-    exception is a derivative of BaseProcessingFail then we can store extra
-    information that can be useful for users telling them why their media failed
-    to process.
+def sniff_handler(media_file, **kw):
+    if kw.get('media') is not None:  # That's a double negative!
+        name, ext = os.path.splitext(kw['media'].filename)
+        clean_ext = ext[1:].lower()  # Strip the . from ext and make lowercase
 
-    Args:
-     - entry_id: The id of the media entry
+        _log.debug('name: {0}\next: {1}\nlower_ext: {2}'.format(
+                name,
+                ext,
+                clean_ext))
 
-    """
-    # Was this a BaseProcessingFail?  In other words, was this a
-    # type of error that we know how to handle?
-    if isinstance(exc, BaseProcessingFail):
-        # Looks like yes, so record information about that failure and any
-        # metadata the user might have supplied.
-        mgg.database['media_entries'].update(
-            {'_id': entry_id},
-            {'$set': {u'state': u'failed',
-                      u'fail_error': exc.exception_path,
-                      u'fail_metadata': exc.metadata}})
+        if clean_ext in SUPPORTED_FILETYPES:
+            _log.info('Found file extension in supported filetypes')
+            return True
+        else:
+            _log.debug('Media present, extension not found in {0}'.format(
+                    SUPPORTED_FILETYPES))
     else:
-        # Looks like no, so just mark it as failed and don't record a
-        # failure_error (we'll assume it wasn't handled) and don't record
-        # metadata (in fact overwrite it if somehow it had previous info
-        # here)
-        mgg.database['media_entries'].update(
-            {'_id': entry_id},
-            {'$set': {u'state': u'failed',
-                      u'fail_error': None,
-                      u'fail_metadata': {}}})
+        _log.warning('Need additional information (keyword argument \'media\')'
+                     ' to be able to handle sniffing')
+
+    return False
 
 
 def process_image(entry):
@@ -117,64 +57,119 @@ def process_image(entry):
     Code to process an image
     """
     workbench = mgg.workbench_manager.create_workbench()
+    # Conversions subdirectory to avoid collisions
+    conversions_subdir = os.path.join(
+        workbench.dir, 'conversions')
+    os.mkdir(conversions_subdir)
 
-    queued_filepath = entry['queued_media_file']
+    queued_filepath = entry.queued_media_file
     queued_filename = workbench.localized_file(
         mgg.queue_store, queued_filepath,
         'source')
 
+    filename_bits = os.path.splitext(queued_filename)
+    basename = os.path.split(filename_bits[0])[1]
+    extension = filename_bits[1].lower()
+
+    # EXIF extraction
+    exif_tags = extract_exif(queued_filename)
+    gps_data = get_gps_data(exif_tags)
+
     try:
         thumb = Image.open(queued_filename)
     except IOError:
         raise BadMediaFail()
 
-    thumb.thumbnail(THUMB_SIZE, Image.ANTIALIAS)
-    # ensure color mode is compatible with jpg
-    if thumb.mode != "RGB":
-        thumb = thumb.convert("RGB")
+    thumb = exif_fix_image_orientation(thumb, exif_tags)
 
-    thumb_filepath = create_pub_filepath(entry, 'thumbnail.jpg')
-    thumb_file = mgg.public_store.get_file(thumb_filepath, 'w')
+    thumb.thumbnail(
+        (mgg.global_config['media:thumb']['max_width'],
+         mgg.global_config['media:thumb']['max_height']),
+        Image.ANTIALIAS)
 
-    with thumb_file:
-        thumb.save(thumb_file, "JPEG", quality=90)
+    # Copy the thumb to the conversion subdir, then remotely.
+    thumb_filename = 'thumbnail' + extension
+    thumb_filepath = create_pub_filepath(entry, thumb_filename)
+
+    tmp_thumb_filename = os.path.join(
+        conversions_subdir, thumb_filename)
+
+    with file(tmp_thumb_filename, 'w') as thumb_file:
+        thumb.save(thumb_file)
+
+    mgg.public_store.copy_local_to_storage(
+        tmp_thumb_filename, thumb_filepath)
 
     # If the size of the original file exceeds the specified size of a `medium`
     # file, a `medium.jpg` files is created and later associated with the media
     # entry.
     medium = Image.open(queued_filename)
-    medium_processed = False
 
-    if medium.size[0] > MEDIUM_SIZE[0] or medium.size[1] > MEDIUM_SIZE[1]:
-        medium.thumbnail(MEDIUM_SIZE, Image.ANTIALIAS)
+    # Fix orientation
+    medium = exif_fix_image_orientation(medium, exif_tags)
+
+    if medium.size[0] > mgg.global_config['media:medium']['max_width'] \
+        or medium.size[1] > mgg.global_config['media:medium']['max_height']:
+        medium.thumbnail(
+            (mgg.global_config['media:medium']['max_width'],
+             mgg.global_config['media:medium']['max_height']),
+            Image.ANTIALIAS)
 
-        if medium.mode != "RGB":
-            medium = medium.convert("RGB")
+    medium_filename = 'medium' + extension
+    medium_filepath = create_pub_filepath(entry, medium_filename)
 
-        medium_filepath = create_pub_filepath(entry, 'medium.jpg')
-        medium_file = mgg.public_store.get_file(medium_filepath, 'w')
+    tmp_medium_filename = os.path.join(
+        conversions_subdir, medium_filename)
 
-        with medium_file:
-            medium.save(medium_file, "JPEG", quality=90)
-            medium_processed = True
+    with file(tmp_medium_filename, 'w') as medium_file:
+        medium.save(medium_file)
+
+    mgg.public_store.copy_local_to_storage(
+        tmp_medium_filename, medium_filepath)
 
     # we have to re-read because unlike PIL, not everything reads
     # things in string representation :)
     queued_file = file(queued_filename, 'rb')
 
     with queued_file:
-        original_filepath = create_pub_filepath(entry, queued_filepath[-1])
+        #create_pub_filepath(entry, queued_filepath[-1])
+        original_filepath = create_pub_filepath(entry, basename + extension)
 
-        with mgg.public_store.get_file(original_filepath, 'wb') as original_file:
+        with mgg.public_store.get_file(original_filepath, 'wb') \
+            as original_file:
             original_file.write(queued_file.read())
 
+    # Remove queued media file from storage and database
     mgg.queue_store.delete_file(queued_filepath)
-    entry['queued_media_file'] = []
+    entry.queued_media_file = []
+
+    # Insert media file information into database
     media_files_dict = entry.setdefault('media_files', {})
     media_files_dict['thumb'] = thumb_filepath
     media_files_dict['original'] = original_filepath
-    if medium_processed:
-        media_files_dict['medium'] = medium_filepath
+    media_files_dict['medium'] = medium_filepath
+
+    # Insert exif data into database
+    media_data = entry.setdefault('media_data', {})
+    media_data['exif'] = {
+        'clean': clean_exif(exif_tags)}
+    media_data['exif']['useful'] = get_useful(
+        media_data['exif']['clean'])
+    media_data['gps'] = gps_data
 
     # clean up workbench
     workbench.destroy_self()
+
+if __name__ == '__main__':
+    import sys
+    import pprint
+
+    pp = pprint.PrettyPrinter()
+
+    result = extract_exif(sys.argv[1])
+    gps = get_gps_data(result)
+    clean = clean_exif(result)
+    useful = get_useful(clean)
+
+    print pp.pprint(
+        clean)