#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import print_function """ get_image_size.py ==================== :Name: get_image_size :Purpose: extract image dimensions given a file path :Author: Paulo Scardine (based on code from Emmanuel VAÏSSE) :Created: 26/09/2013 :Copyright: (c) Paulo Scardine 2013 :Licence: MIT """ import collections import json import os import io import struct FILE_UNKNOWN = "Sorry, don't know how to get size for this file." class UnknownImageFormat(Exception): pass types = collections.OrderedDict() BMP = types['BMP'] = 'BMP' GIF = types['GIF'] = 'GIF' ICO = types['ICO'] = 'ICO' JPEG = types['JPEG'] = 'JPEG' PNG = types['PNG'] = 'PNG' TIFF = types['TIFF'] = 'TIFF' image_fields = ['path', 'type', 'file_size', 'width', 'height'] class Image(collections.namedtuple('Image', image_fields)): def to_str_row(self): return ("%d\t%d\t%d\t%s\t%s" % ( self.width, self.height, self.file_size, self.type, self.path.replace('\t', '\\t'), )) def to_str_row_verbose(self): return ("%d\t%d\t%d\t%s\t%s\t##%s" % ( self.width, self.height, self.file_size, self.type, self.path.replace('\t', '\\t'), self)) def to_str_json(self, indent=None): return json.dumps(self._asdict(), indent=indent) def get_image_size(file_path): """ Return (width, height) for a given img file content - no external dependencies except the os and struct builtin modules """ img = get_image_metadata(file_path) return (img.width, img.height) def get_image_size_from_bytesio(input, size): """ Return (width, height) for a given img file content - no external dependencies except the os and struct builtin modules Args: input (io.IOBase): io object support read & seek size (int): size of buffer in byte """ img = get_image_metadata_from_bytesio(input, size) return (img.width, img.height) def get_image_metadata(file_path): """ Return an `Image` object for a given img file content - no external dependencies except the os and struct builtin modules Args: file_path (str): path to an image file Returns: Image: (path, type, file_size, width, height) """ size = os.path.getsize(file_path) # be explicit with open arguments - we need binary mode with io.open(file_path, "rb") as input: return get_image_metadata_from_bytesio(input, size, file_path) def get_image_metadata_from_bytesio(input, size, file_path=None): """ Return an `Image` object for a given img file content - no external dependencies except the os and struct builtin modules Args: input (io.IOBase): io object support read & seek size (int): size of buffer in byte file_path (str): path to an image file Returns: Image: (path, type, file_size, width, height) """ height = -1 width = -1 data = input.read(26) msg = " raised while trying to decode as JPEG." if (size >= 10) and data[:6] in (b'GIF87a', b'GIF89a'): # GIFs imgtype = GIF w, h = struct.unpack("= 24) and data.startswith(b'\211PNG\r\n\032\n') and (data[12:16] == b'IHDR')): # PNGs imgtype = PNG w, h = struct.unpack(">LL", data[16:24]) width = int(w) height = int(h) elif (size >= 16) and data.startswith(b'\211PNG\r\n\032\n'): # older PNGs imgtype = PNG w, h = struct.unpack(">LL", data[8:16]) width = int(w) height = int(h) elif (size >= 2) and data.startswith(b'\377\330'): # JPEG imgtype = JPEG input.seek(0) input.read(2) b = input.read(1) try: while (b and ord(b) != 0xDA): while (ord(b) != 0xFF): b = input.read(1) while (ord(b) == 0xFF): b = input.read(1) if (ord(b) >= 0xC0 and ord(b) <= 0xC3): input.read(3) h, w = struct.unpack(">HH", input.read(4)) break else: input.read( int(struct.unpack(">H", input.read(2))[0]) - 2) b = input.read(1) width = int(w) height = int(h) except struct.error: raise UnknownImageFormat("StructError" + msg) except ValueError: raise UnknownImageFormat("ValueError" + msg) except Exception as e: raise UnknownImageFormat(e.__class__.__name__ + msg) elif (size >= 26) and data.startswith(b'BM'): # BMP imgtype = 'BMP' headersize = struct.unpack("= 40: w, h = struct.unpack("= 8) and data[:4] in (b"II\052\000", b"MM\000\052"): # Standard TIFF, big- or little-endian # BigTIFF and other different but TIFF-like formats are not # supported currently imgtype = TIFF byteOrder = data[:2] boChar = ">" if byteOrder == "MM" else "<" # maps TIFF type id to size (in bytes) # and python format char for struct tiffTypes = { 1: (1, boChar + "B"), # BYTE 2: (1, boChar + "c"), # ASCII 3: (2, boChar + "H"), # SHORT 4: (4, boChar + "L"), # LONG 5: (8, boChar + "LL"), # RATIONAL 6: (1, boChar + "b"), # SBYTE 7: (1, boChar + "c"), # UNDEFINED 8: (2, boChar + "h"), # SSHORT 9: (4, boChar + "l"), # SLONG 10: (8, boChar + "ll"), # SRATIONAL 11: (4, boChar + "f"), # FLOAT 12: (8, boChar + "d") # DOUBLE } ifdOffset = struct.unpack(boChar + "L", data[4:8])[0] try: countSize = 2 input.seek(ifdOffset) ec = input.read(countSize) ifdEntryCount = struct.unpack(boChar + "H", ec)[0] # 2 bytes: TagId + 2 bytes: type + 4 bytes: count of values + 4 # bytes: value offset ifdEntrySize = 12 for i in range(ifdEntryCount): entryOffset = ifdOffset + countSize + i * ifdEntrySize input.seek(entryOffset) tag = input.read(2) tag = struct.unpack(boChar + "H", tag)[0] if(tag == 256 or tag == 257): # if type indicates that value fits into 4 bytes, value # offset is not an offset but value itself type = input.read(2) type = struct.unpack(boChar + "H", type)[0] if type not in tiffTypes: raise UnknownImageFormat( "Unkown TIFF field type:" + str(type)) typeSize = tiffTypes[type][0] typeChar = tiffTypes[type][1] input.seek(entryOffset + 8) value = input.read(typeSize) value = int(struct.unpack(typeChar, value)[0]) if tag == 256: width = value else: height = value if width > -1 and height > -1: break except Exception as e: raise UnknownImageFormat(str(e)) elif size >= 2: # see http://en.wikipedia.org/wiki/ICO_(file_format) imgtype = 'ICO' input.seek(0) reserved = input.read(2) if 0 != struct.unpack(" 1: import warnings warnings.warn("ICO File contains more than one image") # http://msdn.microsoft.com/en-us/library/ms997538.aspx w = input.read(1) h = input.read(1) width = ord(w) height = ord(h) else: raise UnknownImageFormat(FILE_UNKNOWN) return Image(path=file_path, type=imgtype, file_size=size, width=width, height=height) import unittest class Test_get_image_size(unittest.TestCase): data = [{ 'path': 'lookmanodeps.png', 'width': 251, 'height': 208, 'file_size': 22228, 'type': 'PNG'}] def setUp(self): pass def test_get_image_size_from_bytesio(self): img = self.data[0] p = img['path'] with io.open(p, 'rb') as fp: b = fp.read() fp = io.BytesIO(b) sz = len(b) output = get_image_size_from_bytesio(fp, sz) self.assertTrue(output) self.assertEqual(output, (img['width'], img['height'])) def test_get_image_metadata_from_bytesio(self): img = self.data[0] p = img['path'] with io.open(p, 'rb') as fp: b = fp.read() fp = io.BytesIO(b) sz = len(b) output = get_image_metadata_from_bytesio(fp, sz) self.assertTrue(output) for field in image_fields: self.assertEqual(getattr(output, field), None if field == 'path' else img[field]) def test_get_image_metadata(self): img = self.data[0] output = get_image_metadata(img['path']) self.assertTrue(output) for field in image_fields: self.assertEqual(getattr(output, field), img[field]) def test_get_image_metadata__ENOENT_OSError(self): with self.assertRaises(OSError): get_image_metadata('THIS_DOES_NOT_EXIST') def test_get_image_metadata__not_an_image_UnknownImageFormat(self): with self.assertRaises(UnknownImageFormat): get_image_metadata('README.rst') def test_get_image_size(self): img = self.data[0] output = get_image_size(img['path']) self.assertTrue(output) self.assertEqual(output, (img['width'], img['height'])) def tearDown(self): pass def main(argv=None): """ Print image metadata fields for the given file path. Keyword Arguments: argv (list): commandline arguments (e.g. sys.argv[1:]) Returns: int: zero for OK """ import logging import optparse import sys prs = optparse.OptionParser( usage="%prog [-v|--verbose] [--json|--json-indent] []", description="Print metadata for the given image paths " "(without image library bindings).") prs.add_option('--json', dest='json', action='store_true') prs.add_option('--json-indent', dest='json_indent', action='store_true') prs.add_option('-v', '--verbose', dest='verbose', action='store_true',) prs.add_option('-q', '--quiet', dest='quiet', action='store_true',) prs.add_option('-t', '--test', dest='run_tests', action='store_true',) argv = list(argv) if argv is not None else sys.argv[1:] (opts, args) = prs.parse_args(args=argv) loglevel = logging.INFO if opts.verbose: loglevel = logging.DEBUG elif opts.quiet: loglevel = logging.ERROR logging.basicConfig(level=loglevel) log = logging.getLogger() log.debug('argv: %r', argv) log.debug('opts: %r', opts) log.debug('args: %r', args) if opts.run_tests: import sys sys.argv = [sys.argv[0]] + args import unittest return unittest.main() output_func = Image.to_str_row if opts.json_indent: import functools output_func = functools.partial(Image.to_str_json, indent=2) elif opts.json: output_func = Image.to_str_json elif opts.verbose: output_func = Image.to_str_row_verbose EX_OK = 0 EX_NOT_OK = 2 if len(args) < 1: prs.print_help() print('') prs.error("You must specify one or more paths to image files") errors = [] for path_arg in args: try: img = get_image_metadata(path_arg) print(output_func(img)) except KeyboardInterrupt: raise except OSError as e: log.error((path_arg, e)) errors.append((path_arg, e)) except Exception as e: log.exception(e) errors.append((path_arg, e)) pass if len(errors): import pprint print("ERRORS", file=sys.stderr) print("======", file=sys.stderr) print(pprint.pformat(errors, indent=2), file=sys.stderr) return EX_NOT_OK return EX_OK if __name__ == "__main__": import sys sys.exit(main(argv=sys.argv[1:]))