263 lines
11 KiB
Python
263 lines
11 KiB
Python
"""
|
|
Read Exif metadata from tiff and jpeg files.
|
|
"""
|
|
|
|
from .exif_log import get_logger
|
|
from .classes import *
|
|
from .tags import *
|
|
from .utils import ord_
|
|
|
|
__version__ = '2.1.2'
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
def increment_base(data, base):
|
|
return ord_(data[base + 2]) * 256 + ord_(data[base + 3]) + 2
|
|
|
|
|
|
def process_file(f, stop_tag=DEFAULT_STOP_TAG, details=True, strict=False, debug=False, truncate_tags=True):
|
|
"""
|
|
Process an image file (expects an open file object).
|
|
|
|
This is the function that has to deal with all the arbitrary nasty bits
|
|
of the EXIF standard.
|
|
"""
|
|
|
|
# by default do not fake an EXIF beginning
|
|
fake_exif = 0
|
|
|
|
# determine whether it's a JPEG or TIFF
|
|
data = f.read(12)
|
|
if data[0:4] in [b'II*\x00', b'MM\x00*']:
|
|
# it's a TIFF file
|
|
logger.debug("TIFF format recognized in data[0:4]")
|
|
f.seek(0)
|
|
endian = f.read(1)
|
|
f.read(1)
|
|
offset = 0
|
|
elif data[0:2] == b'\xFF\xD8':
|
|
# it's a JPEG file
|
|
logger.debug("JPEG format recognized data[0:2]=0x%X%X", ord_(data[0]), ord_(data[1]))
|
|
base = 2
|
|
logger.debug("data[2]=0x%X data[3]=0x%X data[6:10]=%s",
|
|
ord_(data[2]), ord_(data[3]), data[6:10])
|
|
while ord_(data[2]) == 0xFF and data[6:10] in (b'JFIF', b'JFXX', b'OLYM', b'Phot'):
|
|
length = ord_(data[4]) * 256 + ord_(data[5])
|
|
logger.debug(" Length offset is %s", length)
|
|
f.read(length - 8)
|
|
# fake an EXIF beginning of file
|
|
# I don't think this is used. --gd
|
|
data = b'\xFF\x00' + f.read(10)
|
|
fake_exif = 1
|
|
if base > 2:
|
|
logger.debug(" Added to base")
|
|
base = base + length + 4 - 2
|
|
else:
|
|
logger.debug(" Added to zero")
|
|
base = length + 4
|
|
logger.debug(" Set segment base to 0x%X", base)
|
|
|
|
# Big ugly patch to deal with APP2 (or other) data coming before APP1
|
|
f.seek(0)
|
|
# in theory, this could be insufficient since 64K is the maximum size--gd
|
|
data = f.read(base + 4000)
|
|
# base = 2
|
|
while 1:
|
|
logger.debug(" Segment base 0x%X", base)
|
|
if data[base:base + 2] == b'\xFF\xE1':
|
|
# APP1
|
|
logger.debug(" APP1 at base 0x%X", base)
|
|
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
|
|
ord_(data[base + 3]))
|
|
logger.debug(" Code: %s", data[base + 4:base + 8])
|
|
if data[base + 4:base + 8] == b"Exif":
|
|
logger.debug(" Decrement base by 2 to get to pre-segment header (for compatibility with later code)")
|
|
base -= 2
|
|
break
|
|
increment = increment_base(data, base)
|
|
logger.debug(" Increment base by %s", increment)
|
|
base += increment
|
|
elif data[base:base + 2] == b'\xFF\xE0':
|
|
# APP0
|
|
logger.debug(" APP0 at base 0x%X", base)
|
|
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
|
|
ord_(data[base + 3]))
|
|
logger.debug(" Code: %s", data[base + 4:base + 8])
|
|
increment = increment_base(data, base)
|
|
logger.debug(" Increment base by %s", increment)
|
|
base += increment
|
|
elif data[base:base + 2] == b'\xFF\xE2':
|
|
# APP2
|
|
logger.debug(" APP2 at base 0x%X", base)
|
|
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
|
|
ord_(data[base + 3]))
|
|
logger.debug(" Code: %s", data[base + 4:base + 8])
|
|
increment = increment_base(data, base)
|
|
logger.debug(" Increment base by %s", increment)
|
|
base += increment
|
|
elif data[base:base + 2] == b'\xFF\xEE':
|
|
# APP14
|
|
logger.debug(" APP14 Adobe segment at base 0x%X", base)
|
|
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
|
|
ord_(data[base + 3]))
|
|
logger.debug(" Code: %s", data[base + 4:base + 8])
|
|
increment = increment_base(data, base)
|
|
logger.debug(" Increment base by %s", increment)
|
|
base += increment
|
|
logger.debug(" There is useful EXIF-like data here, but we have no parser for it.")
|
|
elif data[base:base + 2] == b'\xFF\xDB':
|
|
logger.debug(" JPEG image data at base 0x%X No more segments are expected.",
|
|
base)
|
|
break
|
|
elif data[base:base + 2] == b'\xFF\xD8':
|
|
# APP12
|
|
logger.debug(" FFD8 segment at base 0x%X", base)
|
|
logger.debug(" Got 0x%X 0x%X and %s instead",
|
|
ord_(data[base]),
|
|
ord_(data[base + 1]),
|
|
data[4 + base:10 + base])
|
|
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
|
|
ord_(data[base + 3]))
|
|
logger.debug(" Code: %s", data[base + 4:base + 8])
|
|
increment = increment_base(data, base)
|
|
logger.debug(" Increment base by %s", increment)
|
|
base += increment
|
|
elif data[base:base + 2] == b'\xFF\xEC':
|
|
# APP12
|
|
logger.debug(" APP12 XMP (Ducky) or Pictureinfo segment at base 0x%X",
|
|
base)
|
|
logger.debug(" Got 0x%X and 0x%X instead", ord_(data[base]),
|
|
ord_(data[base + 1]))
|
|
logger.debug(" Length: 0x%X 0x%X",
|
|
ord_(data[base + 2]),
|
|
ord_(data[base + 3]))
|
|
logger.debug("Code: %s", data[base + 4:base + 8])
|
|
increment = increment_base(data, base)
|
|
logger.debug(" Increment base by %s", increment)
|
|
base += increment
|
|
logger.debug(
|
|
" There is useful EXIF-like data here (quality, comment, copyright), but we have no parser for it.")
|
|
else:
|
|
try:
|
|
increment = increment_base(data, base)
|
|
logger.debug(" Got 0x%X and 0x%X instead",
|
|
ord_(data[base]),
|
|
ord_(data[base + 1]))
|
|
except IndexError:
|
|
logger.debug(" Unexpected/unhandled segment type or file content.")
|
|
return {}
|
|
else:
|
|
logger.debug(" Increment base by %s", increment)
|
|
base += increment
|
|
f.seek(base + 12)
|
|
if ord_(data[2 + base]) == 0xFF and data[6 + base:10 + base] == b'Exif':
|
|
# detected EXIF header
|
|
offset = f.tell()
|
|
endian = f.read(1)
|
|
#HACK TEST: endian = 'M'
|
|
elif ord_(data[2 + base]) == 0xFF and data[6 + base:10 + base + 1] == b'Ducky':
|
|
# detected Ducky header.
|
|
logger.debug("EXIF-like header (normally 0xFF and code): 0x%X and %s",
|
|
ord_(data[2 + base]), data[6 + base:10 + base + 1])
|
|
offset = f.tell()
|
|
endian = f.read(1)
|
|
elif ord_(data[2 + base]) == 0xFF and data[6 + base:10 + base + 1] == b'Adobe':
|
|
# detected APP14 (Adobe)
|
|
logger.debug("EXIF-like header (normally 0xFF and code): 0x%X and %s",
|
|
ord_(data[2 + base]), data[6 + base:10 + base + 1])
|
|
offset = f.tell()
|
|
endian = f.read(1)
|
|
else:
|
|
# no EXIF information
|
|
logger.debug("No EXIF header expected data[2+base]==0xFF and data[6+base:10+base]===Exif (or Duck)")
|
|
logger.debug("Did get 0x%X and %s",
|
|
ord_(data[2 + base]), data[6 + base:10 + base + 1])
|
|
return {}
|
|
else:
|
|
# file format not recognized
|
|
logger.debug("File format not recognized.")
|
|
return {}
|
|
|
|
endian = chr(ord_(endian[0]))
|
|
# deal with the EXIF info we found
|
|
logger.debug("Endian format is %s (%s)", endian, {
|
|
'I': 'Intel',
|
|
'M': 'Motorola',
|
|
'\x01': 'Adobe Ducky',
|
|
'd': 'XMP/Adobe unknown'
|
|
}[endian])
|
|
|
|
hdr = ExifHeader(f, endian, offset, fake_exif, strict, debug, details, truncate_tags)
|
|
ifd_list = hdr.list_ifd()
|
|
thumb_ifd = False
|
|
ctr = 0
|
|
for ifd in ifd_list:
|
|
if ctr == 0:
|
|
ifd_name = 'Image'
|
|
elif ctr == 1:
|
|
ifd_name = 'Thumbnail'
|
|
thumb_ifd = ifd
|
|
else:
|
|
ifd_name = 'IFD %d' % ctr
|
|
logger.debug('IFD %d (%s) at offset %s:', ctr, ifd_name, ifd)
|
|
hdr.dump_ifd(ifd, ifd_name, stop_tag=stop_tag)
|
|
ctr += 1
|
|
# EXIF IFD
|
|
exif_off = hdr.tags.get('Image ExifOffset')
|
|
if exif_off:
|
|
logger.debug('Exif SubIFD at offset %s:', exif_off.values[0])
|
|
hdr.dump_ifd(exif_off.values[0], 'EXIF', stop_tag=stop_tag)
|
|
|
|
# deal with MakerNote contained in EXIF IFD
|
|
# (Some apps use MakerNote tags but do not use a format for which we
|
|
# have a description, do not process these).
|
|
if details and 'EXIF MakerNote' in hdr.tags and 'Image Make' in hdr.tags:
|
|
hdr.decode_maker_note()
|
|
|
|
# extract thumbnails
|
|
if details and thumb_ifd:
|
|
hdr.extract_tiff_thumbnail(thumb_ifd)
|
|
hdr.extract_jpeg_thumbnail()
|
|
|
|
# parse XMP tags (experimental)
|
|
if debug and details:
|
|
xmp_string = b''
|
|
# Easy we already have them
|
|
if 'Image ApplicationNotes' in hdr.tags:
|
|
logger.debug('XMP present in Exif')
|
|
xmp_string = make_string(hdr.tags['Image ApplicationNotes'].values)
|
|
# We need to look in the entire file for the XML
|
|
else:
|
|
logger.debug('XMP not in Exif, searching file for XMP info...')
|
|
xml_started = False
|
|
xml_finished = False
|
|
for line in f:
|
|
open_tag = line.find(b'<x:xmpmeta')
|
|
close_tag = line.find(b'</x:xmpmeta>')
|
|
|
|
if open_tag != -1:
|
|
xml_started = True
|
|
line = line[open_tag:]
|
|
logger.debug('XMP found opening tag at line position %s' % open_tag)
|
|
|
|
if close_tag != -1:
|
|
logger.debug('XMP found closing tag at line position %s' % close_tag)
|
|
line_offset = 0
|
|
if open_tag != -1:
|
|
line_offset = open_tag
|
|
line = line[:(close_tag - line_offset) + 12]
|
|
xml_finished = True
|
|
|
|
if xml_started:
|
|
xmp_string += line
|
|
|
|
if xml_finished:
|
|
break
|
|
|
|
logger.debug('XMP Finished searching for info')
|
|
if xmp_string:
|
|
hdr.parse_xmp(xmp_string)
|
|
|
|
return hdr.tags
|