Bug #584, upgrade EXIF.py from master to read Nikon data

author Odin Hørthe Omdal <odin.omdal@gmail.com>

Wed, 26 Dec 2012 15:16:33 +0000 (16:16 +0100)

committer Joar Wandborg <git@wandborg.com>

Wed, 26 Dec 2012 17:42:37 +0000 (18:42 +0100)
author Odin Hørthe Omdal <odin.omdal@gmail.com>
Wed, 26 Dec 2012 15:16:33 +0000 (16:16 +0100)
committer Joar Wandborg <git@wandborg.com>
Wed, 26 Dec 2012 17:42:37 +0000 (18:42 +0100)
diff --git a/extlib/exif/EXIF.py b/extlib/exif/EXIF.py

index b777db8f2cf60ee84d244e9102bed3725746b57e..a188154e56bc12cbb38179f360197c803d54a3b1 100755 (executable)
--- a/extlib/exif/EXIF.py
+++ b/extlib/exif/EXIF.py
@@ -1,10 +1,12 @@
  #!/usr/bin/env python
  # -*- coding: utf-8 -*-
  #
-# Library to extract EXIF information from digital camera image files
-# http://sourceforge.net/projects/exif-py/
  #
-# VERSION 1.0.10
+# Library to extract EXIF information from digital camera image files.
+# https://github.com/ianare/exif-py
+#
+#
+# VERSION 1.1.0
  #
  # To use this library call with:
  #    f = open(path_name, 'rb')
@@ -22,7 +24,6 @@
  #
  # These 2 are useful when you are retrieving a large list of images
  #
-#
  # To return an error on invalid tags,
  # pass the -s or --strict argument, or as
  #    tags = EXIF.process_file(f, strict=True)
@@ -48,7 +49,7 @@
  # 'EXIF DateTimeOriginal', 'Image Orientation', 'MakerNote FocusMode'
  #
  # Copyright (c) 2002-2007 Gene Cash All rights reserved
-# Copyright (c) 2007-2008 Ianaré Sévi All rights reserved
+# Copyright (c) 2007-2012 Ianaré Sévi All rights reserved
  #
  # Redistribution and use in source and binary forms, with or without
  # modification, are permitted provided that the following conditions
@@ -102,7 +103,7 @@ def make_string_uc(seq):
      seq = seq[8:]
      # Of course, this is only correct if ASCII, and the standard explicitly
      # allows JIS and Unicode.
-    return make_string(seq)
+    return make_string( make_string(seq) )
  
  # field type descriptions as (length, abbreviation, full name) tuples
  FIELD_TYPES = (
@@ -1248,10 +1249,17 @@ class IFD_Tag:
          return self.printable
  
      def __repr__(self):
-        return '(0x%04X) %s=%s @ %d' % (self.tag,
+        try:
+            s= '(0x%04X) %s=%s @ %d' % (self.tag,
                                          FIELD_TYPES[self.field_type][2],
                                          self.printable,
                                          self.field_offset)
+        except:
+            s= '(%s) %s=%s @ %s' % (str(self.tag),
+                                        FIELD_TYPES[self.field_type][2],
+                                        self.printable,
+                                        str(self.field_offset))
+        return s
  
  # class that handles an EXIF header
  class EXIF_header:
@@ -1369,14 +1377,15 @@ class EXIF_header:
                      # special case: null-terminated ASCII string
                      # XXX investigate
                      # sometimes gets too big to fit in int value
-                    if count != 0 and count < (2**31):
-                        self.file.seek(self.offset + offset)
-                        values = self.file.read(count)
-                        #print values
-                        # Drop any garbage after a null.
-                        values = values.split('\x00', 1)[0]
-                    else:
-                        values = ''
+                    if count != 0: # and count < (2**31):  # 2E31 is hardware dependant. --gd
+                        try:
+                            self.file.seek(self.offset + offset)
+                            values = self.file.read(count)
+                            #print values
+                            # Drop any garbage after a null.
+                            values = values.split('\x00', 1)[0]
+                        except OverflowError:
+                            values = ''
                  else:
                      values = []
                      signed = (field_type in [6, 8, 9, 10])
@@ -1635,21 +1644,118 @@ def process_file(f, stop_tag='UNDEF', details=True, strict=False, debug=False):
          offset = 0
      elif data[0:2] == '\xFF\xD8':
          # it's a JPEG file
+        if debug: print "JPEG format recognized data[0:2] == '0xFFD8'."
+        base = 2
          while data[2] == '\xFF' and data[6:10] in ('JFIF', 'JFXX', 'OLYM', 'Phot'):
+            if debug: print "data[2] == 0xxFF data[3]==%x and data[6:10] = %s"%(ord(data[3]),data[6:10])
              length = ord(data[4])*256+ord(data[5])
+            if debug: print "Length offset is",length
              f.read(length-8)
              # fake an EXIF beginning of file
+            # I don't think this is used. --gd
              data = '\xFF\x00'+f.read(10)
              fake_exif = 1
-        if data[2] == '\xFF' and data[6:10] == 'Exif':
+            if base>2: 
+                if debug: print "added to base "
+                base = base + length + 4 -2
+            else: 
+                if debug: print "added to zero "
+                base = length + 4
+            if debug: print "Set segment base to",base
+
+        # Big ugly patch to deal with APP2 (or other) data coming before APP1
+        f.seek(0)
+        data = f.read(base+4000) # in theory, this could be insufficient since 64K is the maximum size--gd
+        # base = 2
+        while 1:
+            if debug: print "Segment base 0x%X" % base
+            if data[base:base+2]=='\xFF\xE1':
+                # APP1
+                if debug: print "APP1 at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if data[base+4:base+8] == "Exif":
+                    if debug: print "Decrement base by",2,"to get to pre-segment header (for compatibility with later code)"
+                    base = base-2
+                    break
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xE0':
+                # APP0
+                if debug: print "APP0 at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xE2':
+                # APP2
+                if debug: print "APP2 at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xEE':
+                # APP14
+                if debug: print "APP14 Adobe segment at base",hex(base)
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                print "There is useful EXIF-like data here, but we have no parser for it."
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xDB':
+                if debug: print "JPEG image data at base",hex(base),"No more segments are expected."
+                # sys.exit(0)
+                break
+            elif data[base:base+2]=='\xFF\xD8':
+                # APP12
+                if debug: print "FFD8 segment at base",hex(base)
+                if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead."
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            elif data[base:base+2]=='\xFF\xEC':
+                # APP12
+                if debug: print "APP12 XMP (Ducky) or Pictureinfo segment at base",hex(base)
+                if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead."
+                if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3]))
+                if debug: print "Code",data[base+4:base+8]
+                if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                print "There is useful EXIF-like data here (quality, comment, copyright), but we have no parser for it."
+                base=base+ord(data[base+2])*256+ord(data[base+3])+2
+            else: 
+                try:
+                    if debug: print "Unexpected/unhandled segment type or file content."
+                    if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead."
+                    if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2
+                except: pass
+                try: base=base+ord(data[base+2])*256+ord(data[base+3])+2
+                except: pass
+
+        f.seek(base+12)
+        if data[2+base] == '\xFF' and data[6+base:10+base] == 'Exif':
              # detected EXIF header
              offset = f.tell()
              endian = f.read(1)
+            #HACK TEST:  endian = 'M'
+        elif data[2+base] == '\xFF' and data[6+base:10+base+1] == 'Ducky':
+            # detected Ducky header.
+            if debug: print "EXIF-like header (normally 0xFF and code):",hex(ord(data[2+base])) , "and", data[6+base:10+base+1]
+            offset = f.tell()
+            endian = f.read(1)
+        elif data[2+base] == '\xFF' and data[6+base:10+base+1] == 'Adobe':
+            # detected APP14 (Adobe)
+            if debug: print "EXIF-like header (normally 0xFF and code):",hex(ord(data[2+base])) , "and", data[6+base:10+base+1]
+            offset = f.tell()
+            endian = f.read(1)
          else:
              # no EXIF information
+            if debug: print "No EXIF header expected data[2+base]==0xFF and data[6+base:10+base]===Exif (or Duck)"
+            if debug: print " but got",hex(ord(data[2+base])) , "and", data[6+base:10+base+1]
              return {}
      else:
          # file format not recognized
+        if debug: print "file format not recognized"
          return {}
  
      # deal with the EXIF info we found
diff --git a/extlib/exif/changes.txt b/extlib/exif/changes.txt

index 887d84fdfe75294913a0916e7fa3058fd97cf361..d1b18e6c06070ff968b4e6c67a6ad249571b2713 100644 (file)
--- a/extlib/exif/changes.txt
+++ b/extlib/exif/changes.txt
@@ -1,5 +1,10 @@
  ~ EXIF.py Changelog ~
  
+2012-11-30 - Gregory Dudek (date of merge).
+Patches and changes:
+  Overflow error fixes added (related to 2**31 size)
+  GPS tags added.
+
  2012-09-26 - Ianaré Sévi
  Merge patches:
    Add GPS tags
author	Odin Hørthe Omdal <odin.omdal@gmail.com>
	Wed, 26 Dec 2012 15:16:33 +0000 (16:16 +0100)
committer	Joar Wandborg <git@wandborg.com>
	Wed, 26 Dec 2012 17:42:37 +0000 (18:42 +0100)
extlib/exif/EXIF.py		patch \| blob \| blame \| history
extlib/exif/changes.txt		patch \| blob \| blame \| history