From: Odin Hørthe Omdal Date: Wed, 26 Dec 2012 15:16:33 +0000 (+0100) Subject: Bug #584, upgrade EXIF.py from master to read Nikon data X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=6e60238b6c182ce60ac1d929f41b1b92ac7d25d7;p=mediagoblin.git Bug #584, upgrade EXIF.py from master to read Nikon data --- diff --git a/extlib/exif/EXIF.py b/extlib/exif/EXIF.py index b777db8f..a188154e 100755 --- a/extlib/exif/EXIF.py +++ b/extlib/exif/EXIF.py @@ -1,10 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Library to extract EXIF information from digital camera image files -# http://sourceforge.net/projects/exif-py/ # -# VERSION 1.0.10 +# Library to extract EXIF information from digital camera image files. +# https://github.com/ianare/exif-py +# +# +# VERSION 1.1.0 # # To use this library call with: # f = open(path_name, 'rb') @@ -22,7 +24,6 @@ # # These 2 are useful when you are retrieving a large list of images # -# # To return an error on invalid tags, # pass the -s or --strict argument, or as # tags = EXIF.process_file(f, strict=True) @@ -48,7 +49,7 @@ # 'EXIF DateTimeOriginal', 'Image Orientation', 'MakerNote FocusMode' # # Copyright (c) 2002-2007 Gene Cash All rights reserved -# Copyright (c) 2007-2008 Ianaré Sévi All rights reserved +# Copyright (c) 2007-2012 Ianaré Sévi All rights reserved # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -102,7 +103,7 @@ def make_string_uc(seq): seq = seq[8:] # Of course, this is only correct if ASCII, and the standard explicitly # allows JIS and Unicode. - return make_string(seq) + return make_string( make_string(seq) ) # field type descriptions as (length, abbreviation, full name) tuples FIELD_TYPES = ( @@ -1248,10 +1249,17 @@ class IFD_Tag: return self.printable def __repr__(self): - return '(0x%04X) %s=%s @ %d' % (self.tag, + try: + s= '(0x%04X) %s=%s @ %d' % (self.tag, FIELD_TYPES[self.field_type][2], self.printable, self.field_offset) + except: + s= '(%s) %s=%s @ %s' % (str(self.tag), + FIELD_TYPES[self.field_type][2], + self.printable, + str(self.field_offset)) + return s # class that handles an EXIF header class EXIF_header: @@ -1369,14 +1377,15 @@ class EXIF_header: # special case: null-terminated ASCII string # XXX investigate # sometimes gets too big to fit in int value - if count != 0 and count < (2**31): - self.file.seek(self.offset + offset) - values = self.file.read(count) - #print values - # Drop any garbage after a null. - values = values.split('\x00', 1)[0] - else: - values = '' + if count != 0: # and count < (2**31): # 2E31 is hardware dependant. --gd + try: + self.file.seek(self.offset + offset) + values = self.file.read(count) + #print values + # Drop any garbage after a null. + values = values.split('\x00', 1)[0] + except OverflowError: + values = '' else: values = [] signed = (field_type in [6, 8, 9, 10]) @@ -1635,21 +1644,118 @@ def process_file(f, stop_tag='UNDEF', details=True, strict=False, debug=False): offset = 0 elif data[0:2] == '\xFF\xD8': # it's a JPEG file + if debug: print "JPEG format recognized data[0:2] == '0xFFD8'." + base = 2 while data[2] == '\xFF' and data[6:10] in ('JFIF', 'JFXX', 'OLYM', 'Phot'): + if debug: print "data[2] == 0xxFF data[3]==%x and data[6:10] = %s"%(ord(data[3]),data[6:10]) length = ord(data[4])*256+ord(data[5]) + if debug: print "Length offset is",length f.read(length-8) # fake an EXIF beginning of file + # I don't think this is used. --gd data = '\xFF\x00'+f.read(10) fake_exif = 1 - if data[2] == '\xFF' and data[6:10] == 'Exif': + if base>2: + if debug: print "added to base " + base = base + length + 4 -2 + else: + if debug: print "added to zero " + base = length + 4 + if debug: print "Set segment base to",base + + # Big ugly patch to deal with APP2 (or other) data coming before APP1 + f.seek(0) + data = f.read(base+4000) # in theory, this could be insufficient since 64K is the maximum size--gd + # base = 2 + while 1: + if debug: print "Segment base 0x%X" % base + if data[base:base+2]=='\xFF\xE1': + # APP1 + if debug: print "APP1 at base",hex(base) + if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3])) + if debug: print "Code",data[base+4:base+8] + if data[base+4:base+8] == "Exif": + if debug: print "Decrement base by",2,"to get to pre-segment header (for compatibility with later code)" + base = base-2 + break + if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2 + base=base+ord(data[base+2])*256+ord(data[base+3])+2 + elif data[base:base+2]=='\xFF\xE0': + # APP0 + if debug: print "APP0 at base",hex(base) + if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3])) + if debug: print "Code",data[base+4:base+8] + if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2 + base=base+ord(data[base+2])*256+ord(data[base+3])+2 + elif data[base:base+2]=='\xFF\xE2': + # APP2 + if debug: print "APP2 at base",hex(base) + if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3])) + if debug: print "Code",data[base+4:base+8] + if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2 + base=base+ord(data[base+2])*256+ord(data[base+3])+2 + elif data[base:base+2]=='\xFF\xEE': + # APP14 + if debug: print "APP14 Adobe segment at base",hex(base) + if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3])) + if debug: print "Code",data[base+4:base+8] + if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2 + print "There is useful EXIF-like data here, but we have no parser for it." + base=base+ord(data[base+2])*256+ord(data[base+3])+2 + elif data[base:base+2]=='\xFF\xDB': + if debug: print "JPEG image data at base",hex(base),"No more segments are expected." + # sys.exit(0) + break + elif data[base:base+2]=='\xFF\xD8': + # APP12 + if debug: print "FFD8 segment at base",hex(base) + if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead." + if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3])) + if debug: print "Code",data[base+4:base+8] + if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2 + base=base+ord(data[base+2])*256+ord(data[base+3])+2 + elif data[base:base+2]=='\xFF\xEC': + # APP12 + if debug: print "APP12 XMP (Ducky) or Pictureinfo segment at base",hex(base) + if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead." + if debug: print "Length",hex(ord(data[base+2])), hex(ord(data[base+3])) + if debug: print "Code",data[base+4:base+8] + if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2 + print "There is useful EXIF-like data here (quality, comment, copyright), but we have no parser for it." + base=base+ord(data[base+2])*256+ord(data[base+3])+2 + else: + try: + if debug: print "Unexpected/unhandled segment type or file content." + if debug: print "Got",hex(ord(data[base])), hex(ord(data[base+1])),"and", data[4+base:10+base], "instead." + if debug: print "Increment base by",ord(data[base+2])*256+ord(data[base+3])+2 + except: pass + try: base=base+ord(data[base+2])*256+ord(data[base+3])+2 + except: pass + + f.seek(base+12) + if data[2+base] == '\xFF' and data[6+base:10+base] == 'Exif': # detected EXIF header offset = f.tell() endian = f.read(1) + #HACK TEST: endian = 'M' + elif data[2+base] == '\xFF' and data[6+base:10+base+1] == 'Ducky': + # detected Ducky header. + if debug: print "EXIF-like header (normally 0xFF and code):",hex(ord(data[2+base])) , "and", data[6+base:10+base+1] + offset = f.tell() + endian = f.read(1) + elif data[2+base] == '\xFF' and data[6+base:10+base+1] == 'Adobe': + # detected APP14 (Adobe) + if debug: print "EXIF-like header (normally 0xFF and code):",hex(ord(data[2+base])) , "and", data[6+base:10+base+1] + offset = f.tell() + endian = f.read(1) else: # no EXIF information + if debug: print "No EXIF header expected data[2+base]==0xFF and data[6+base:10+base]===Exif (or Duck)" + if debug: print " but got",hex(ord(data[2+base])) , "and", data[6+base:10+base+1] return {} else: # file format not recognized + if debug: print "file format not recognized" return {} # deal with the EXIF info we found diff --git a/extlib/exif/changes.txt b/extlib/exif/changes.txt index 887d84fd..d1b18e6c 100644 --- a/extlib/exif/changes.txt +++ b/extlib/exif/changes.txt @@ -1,5 +1,10 @@ ~ EXIF.py Changelog ~ +2012-11-30 - Gregory Dudek (date of merge). +Patches and changes: + Overflow error fixes added (related to 2**31 size) + GPS tags added. + 2012-09-26 - Ianaré Sévi Merge patches: Add GPS tags