This repository has been archived on 2018-05-13. You can view files and clone it, but cannot push or open issues or pull requests.
moviespy/movie_spy/exifread/__init__.py

263 lines
11 KiB
Python

"""
Read Exif metadata from tiff and jpeg files.
"""
from .exif_log import get_logger
from .classes import *
from .tags import *
from .utils import ord_
__version__ = '2.1.2'
logger = get_logger()
def increment_base(data, base):
return ord_(data[base + 2]) * 256 + ord_(data[base + 3]) + 2
def process_file(f, stop_tag=DEFAULT_STOP_TAG, details=True, strict=False, debug=False, truncate_tags=True):
"""
Process an image file (expects an open file object).
This is the function that has to deal with all the arbitrary nasty bits
of the EXIF standard.
"""
# by default do not fake an EXIF beginning
fake_exif = 0
# determine whether it's a JPEG or TIFF
data = f.read(12)
if data[0:4] in [b'II*\x00', b'MM\x00*']:
# it's a TIFF file
logger.debug("TIFF format recognized in data[0:4]")
f.seek(0)
endian = f.read(1)
f.read(1)
offset = 0
elif data[0:2] == b'\xFF\xD8':
# it's a JPEG file
logger.debug("JPEG format recognized data[0:2]=0x%X%X", ord_(data[0]), ord_(data[1]))
base = 2
logger.debug("data[2]=0x%X data[3]=0x%X data[6:10]=%s",
ord_(data[2]), ord_(data[3]), data[6:10])
while ord_(data[2]) == 0xFF and data[6:10] in (b'JFIF', b'JFXX', b'OLYM', b'Phot'):
length = ord_(data[4]) * 256 + ord_(data[5])
logger.debug(" Length offset is %s", length)
f.read(length - 8)
# fake an EXIF beginning of file
# I don't think this is used. --gd
data = b'\xFF\x00' + f.read(10)
fake_exif = 1
if base > 2:
logger.debug(" Added to base")
base = base + length + 4 - 2
else:
logger.debug(" Added to zero")
base = length + 4
logger.debug(" Set segment base to 0x%X", base)
# Big ugly patch to deal with APP2 (or other) data coming before APP1
f.seek(0)
# in theory, this could be insufficient since 64K is the maximum size--gd
data = f.read(base + 4000)
# base = 2
while 1:
logger.debug(" Segment base 0x%X", base)
if data[base:base + 2] == b'\xFF\xE1':
# APP1
logger.debug(" APP1 at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4:base + 8])
if data[base + 4:base + 8] == b"Exif":
logger.debug(" Decrement base by 2 to get to pre-segment header (for compatibility with later code)")
base -= 2
break
increment = increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base:base + 2] == b'\xFF\xE0':
# APP0
logger.debug(" APP0 at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4:base + 8])
increment = increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base:base + 2] == b'\xFF\xE2':
# APP2
logger.debug(" APP2 at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4:base + 8])
increment = increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base:base + 2] == b'\xFF\xEE':
# APP14
logger.debug(" APP14 Adobe segment at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4:base + 8])
increment = increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
logger.debug(" There is useful EXIF-like data here, but we have no parser for it.")
elif data[base:base + 2] == b'\xFF\xDB':
logger.debug(" JPEG image data at base 0x%X No more segments are expected.",
base)
break
elif data[base:base + 2] == b'\xFF\xD8':
# APP12
logger.debug(" FFD8 segment at base 0x%X", base)
logger.debug(" Got 0x%X 0x%X and %s instead",
ord_(data[base]),
ord_(data[base + 1]),
data[4 + base:10 + base])
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]),
ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4:base + 8])
increment = increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base:base + 2] == b'\xFF\xEC':
# APP12
logger.debug(" APP12 XMP (Ducky) or Pictureinfo segment at base 0x%X",
base)
logger.debug(" Got 0x%X and 0x%X instead", ord_(data[base]),
ord_(data[base + 1]))
logger.debug(" Length: 0x%X 0x%X",
ord_(data[base + 2]),
ord_(data[base + 3]))
logger.debug("Code: %s", data[base + 4:base + 8])
increment = increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
logger.debug(
" There is useful EXIF-like data here (quality, comment, copyright), but we have no parser for it.")
else:
try:
increment = increment_base(data, base)
logger.debug(" Got 0x%X and 0x%X instead",
ord_(data[base]),
ord_(data[base + 1]))
except IndexError:
logger.debug(" Unexpected/unhandled segment type or file content.")
return {}
else:
logger.debug(" Increment base by %s", increment)
base += increment
f.seek(base + 12)
if ord_(data[2 + base]) == 0xFF and data[6 + base:10 + base] == b'Exif':
# detected EXIF header
offset = f.tell()
endian = f.read(1)
#HACK TEST: endian = 'M'
elif ord_(data[2 + base]) == 0xFF and data[6 + base:10 + base + 1] == b'Ducky':
# detected Ducky header.
logger.debug("EXIF-like header (normally 0xFF and code): 0x%X and %s",
ord_(data[2 + base]), data[6 + base:10 + base + 1])
offset = f.tell()
endian = f.read(1)
elif ord_(data[2 + base]) == 0xFF and data[6 + base:10 + base + 1] == b'Adobe':
# detected APP14 (Adobe)
logger.debug("EXIF-like header (normally 0xFF and code): 0x%X and %s",
ord_(data[2 + base]), data[6 + base:10 + base + 1])
offset = f.tell()
endian = f.read(1)
else:
# no EXIF information
logger.debug("No EXIF header expected data[2+base]==0xFF and data[6+base:10+base]===Exif (or Duck)")
logger.debug("Did get 0x%X and %s",
ord_(data[2 + base]), data[6 + base:10 + base + 1])
return {}
else:
# file format not recognized
logger.debug("File format not recognized.")
return {}
endian = chr(ord_(endian[0]))
# deal with the EXIF info we found
logger.debug("Endian format is %s (%s)", endian, {
'I': 'Intel',
'M': 'Motorola',
'\x01': 'Adobe Ducky',
'd': 'XMP/Adobe unknown'
}[endian])
hdr = ExifHeader(f, endian, offset, fake_exif, strict, debug, details, truncate_tags)
ifd_list = hdr.list_ifd()
thumb_ifd = False
ctr = 0
for ifd in ifd_list:
if ctr == 0:
ifd_name = 'Image'
elif ctr == 1:
ifd_name = 'Thumbnail'
thumb_ifd = ifd
else:
ifd_name = 'IFD %d' % ctr
logger.debug('IFD %d (%s) at offset %s:', ctr, ifd_name, ifd)
hdr.dump_ifd(ifd, ifd_name, stop_tag=stop_tag)
ctr += 1
# EXIF IFD
exif_off = hdr.tags.get('Image ExifOffset')
if exif_off:
logger.debug('Exif SubIFD at offset %s:', exif_off.values[0])
hdr.dump_ifd(exif_off.values[0], 'EXIF', stop_tag=stop_tag)
# deal with MakerNote contained in EXIF IFD
# (Some apps use MakerNote tags but do not use a format for which we
# have a description, do not process these).
if details and 'EXIF MakerNote' in hdr.tags and 'Image Make' in hdr.tags:
hdr.decode_maker_note()
# extract thumbnails
if details and thumb_ifd:
hdr.extract_tiff_thumbnail(thumb_ifd)
hdr.extract_jpeg_thumbnail()
# parse XMP tags (experimental)
if debug and details:
xmp_string = b''
# Easy we already have them
if 'Image ApplicationNotes' in hdr.tags:
logger.debug('XMP present in Exif')
xmp_string = make_string(hdr.tags['Image ApplicationNotes'].values)
# We need to look in the entire file for the XML
else:
logger.debug('XMP not in Exif, searching file for XMP info...')
xml_started = False
xml_finished = False
for line in f:
open_tag = line.find(b'<x:xmpmeta')
close_tag = line.find(b'</x:xmpmeta>')
if open_tag != -1:
xml_started = True
line = line[open_tag:]
logger.debug('XMP found opening tag at line position %s' % open_tag)
if close_tag != -1:
logger.debug('XMP found closing tag at line position %s' % close_tag)
line_offset = 0
if open_tag != -1:
line_offset = open_tag
line = line[:(close_tag - line_offset) + 12]
xml_finished = True
if xml_started:
xmp_string += line
if xml_finished:
break
logger.debug('XMP Finished searching for info')
if xmp_string:
hdr.parse_xmp(xmp_string)
return hdr.tags