From: Odin Hørthe Omdal <odin.omdal@gmail.com>
Date: Wed, 26 Dec 2012 15:16:33 +0000 (+0100)
Subject: Bug #584, upgrade EXIF.py from master to read Nikon data
X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=6e60238b6c182ce60ac1d929f41b1b92ac7d25d7;p=mediagoblin.git

Bug #584, upgrade EXIF.py from master to read Nikon data
---

diff --git a/extlib/exif/EXIF.py b/extlib/exif/EXIF.py
index b777db8f..a188154e 100755
--- a/extlib/exif/EXIF.py
+++ b/extlib/exif/EXIF.py
@@ -1,10 +1,12 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# Library to extract EXIF information from digital camera image files
-# http://sourceforge.net/projects/exif-py/
 #
-# VERSION 1.0.10
+# Library to extract EXIF information from digital camera image files.
+# https://github.com/ianare/exif-py
+#
+#
+# VERSION 1.1.0
 #
 # To use this library call with:
 #    f = open(path_name, 'rb')
@@ -22,7 +24,6 @@
 #
 # These 2 are useful when you are retrieving a large list of images
 #
-#
 # To return an error on invalid tags,
 # pass the -s or --strict argument, or as
 #    tags = EXIF.process_file(f, strict=True)
@@ -48,7 +49,7 @@
 # 'EXIF DateTimeOriginal', 'Image Orientation', 'MakerNote FocusMode'
 #
 # Copyright (c) 2002-2007 Gene Cash All rights reserved
-# Copyright (c) 2007-2008 IanarÃ© SÃ©vi All rights reserved
+# Copyright (c) 2007-2012 IanarÃ© SÃ©vi All rights reserved
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -102,7 +103,7 @@ def make_string_uc(seq):
     seq = seq[8:]
     # Of course, this is only correct if ASCII, and the standard explicitly
     # allows JIS and Unicode.
-    return make_string(seq)
+    return make_string( make_string(seq) )
 
 # field type descriptions as (length, abbreviation, full name) tuples
 FIELD_TYPES = (
@@ -1248,10 +1249,17 @@ class IFD_Tag:
         return self.printable
 
     def __repr__(self):
-        return '(0x%04X) %s=%s @ %d' % (self.tag,
+        try:
+            s= '(0x%04X) %s=%s @ %d' % (self.tag,
                                         FIELD_TYPES[self.field_type][2],
                                         self.printable,
                                         self.field_offset)
+        except:
+            s= '(%s) %s=%s @ %s' % (str(self.tag),
+                                        FIELD_TYPES[self.field_type][2],
+                                        self.printable,
+                                        str(self.field_offset))
+        return s
 
 # class that handles an EXIF header
 class EXIF_header:
@@ -1369,14 +1377,15 @@ class EXIF_header:
                     # special case: null-terminated ASCII string
                     # XXX investigate
                     # sometimes gets too big to fit in int value
-                    if count != 0 and count < (2**31):
-                        self.file.seek(self.offset + offset)
-                        values = self.file.read(count)
-                        #print values
-                        # Drop any garbage after a null.
-                        values = values.split('\x00', 1)[0]
-                    else:
-                        values = ''
+                    if count != 0: # and count < (2**31):  # 2E31 is hardware dependant. --gd
+                        try:
+                            self.file.seek(self.offset + offset)
+                            values = self.file.read(count)
+                            #print values
+                            # Drop any garbage after a null.
+                            values = values.split('\x00', 1)[0]
+                        except OverflowError:
+                            values = ''
                 else:
                     values = []
                     signed = (field_type in [6, 8, 9, 10])
@@ -1635,21 +1644,118 @@ def process_file(f, stop_tag='UNDEF', details=True, strict=False, debug=False):
         offset = 0
     elif data[0:2] == '\xFF\xD8':
         # it's a JPEG file
+        if debug: print "JPEG format recognized data[0:2] == '0xFFD8'."
+        base = 2
         while data[2] == '\xFF' and data[6:10] in ('JFIF', 'JFXX', 'OLYM', 'Phot'):
+            if debug: print "data[2] == 0xxFF data[3]==%x and data[6:10] = %s"%(ord(data[3]),data[6:10])
             length = ord(data[4])*256+ord(data[5])
+            if debug: print "Length offset is",length
             f.read(length-8)
             # fake an EXIF beginning of file
+            # I don't think this is used. --gd
             data = '\xFF\x00'+f.read(10)
             fake_exif = 1
-        if data[2] == '\xFF' and data[6:10] == 'Exif':
+            if base>2: 
+                if debug: print "added to base "
+                base = base + length + 4 -2
+            else: 
+                if debug: print "added to zero "
+                base = length + 4
+            if debug: print "Set segment base to",base
+
+        # Big ugly patch to deal with APP2 (or other) data coming before APP1
+        f.seek(0)
+        data = f.read(base+4000) # in theory, this could be insufficient since 64K is the maximum size--gd
+        # base = 2
+        while 1:
+            if debug: print "Segment base 0x%X" % base
+            if data[base:base+2]=='\xFF\xE1':
+                # APP1
+                if debug: print "APP1 at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if data[base+4:base+8] == "Exif":
+                    if debug: print "Decrement base by",2,"to get to pre-segment header (for compatibility with later code)"
+                    base = base-2
+                    break
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xE0':
+                # APP0
+                if debug: print "APP0 at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xE2':
+                # APP2
+                if debug: print "APP2 at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xEE':
+                # APP14
+                if debug: print "APP14 Adobe segment at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                print "There is useful EXIF-like data here, but we have no parser for it."
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xDB':
+                if debug: print "JPEG image data at base",hex(base),"No more segments are expected."
+                # sys.exit(0)
+                break
+            elif data[base:base+2]=='\xFF\xD8':
+                # APP12
+                if debug: print "FFD8 segment at base",hex(base)
+                if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead."
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xEC':
+                # APP12
+                if debug: print "APP12 XMP (Ducky) or Pictureinfo segment at base",hex(base)
+                if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead."
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                print "There is useful EXIF-like data here (quality, comment, copyright), but we have no parser for it."
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            else: 
+                try:
+                    if debug: print "Unexpected/unhandled segment type or file content."
+                    if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead."
+                    if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                except: pass
+                try: base=base+ord(data[base+2])*256+ord(data[base+3])+2
+                except: pass
+
+        f.seek(base+12)
+        if data[2+base] == '\xFF' and data[6+base:10+base] == 'Exif':
             # detected EXIF header
             offset = f.tell()
             endian = f.read(1)
+            #HACK TEST:  endian = 'M'
+        elif data[2+base] == '\xFF' and data[6+base:10+base+1] == 'Ducky':
+            # detected Ducky header.
+            if debug: print "EXIF-like header (normally 0xFF and code):",hex(ord(data[2+base])) , "and", data[6+base:10+base+1]
+            offset = f.tell()
+            endian = f.read(1)
+        elif data[2+base] == '\xFF' and data[6+base:10+base+1] == 'Adobe':
+            # detected APP14 (Adobe)
+            if debug: print "EXIF-like header (normally 0xFF and code):",hex(ord(data[2+base])) , "and", data[6+base:10+base+1]
+            offset = f.tell()
+            endian = f.read(1)
         else:
             # no EXIF information
+            if debug: print "No EXIF header expected data[2+base]==0xFF and data[6+base:10+base]===Exif (or Duck)"
+            if debug: print " but got",hex(ord(data[2+base])) , "and", data[6+base:10+base+1]
             return {}
     else:
         # file format not recognized
+        if debug: print "file format not recognized"
         return {}
 
     # deal with the EXIF info we found
diff --git a/extlib/exif/changes.txt b/extlib/exif/changes.txt
index 887d84fd..d1b18e6c 100644
--- a/extlib/exif/changes.txt
+++ b/extlib/exif/changes.txt
@@ -1,5 +1,10 @@
 ï»¿~ EXIF.py Changelog ~
 
+2012-11-30 - Gregory Dudek (date of merge).
+Patches and changes:
+  Overflow error fixes added (related to 2**31 size)
+  GPS tags added.
+
 2012-09-26 - IanarÃ© SÃ©vi
 Merge patches:
   Add GPS tags