{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Check file sizes\n", "\n", "Some of the image files seem to be JPEG compressed TIFFs and are much smaller than the rest. This notebook just checks to file sizes to look for anomalies." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os\n", "\n", "MIN_FILE_SIZE = 50000000" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "N193-055_0037.tiff\n", "N193-100_0318.tif\n", "N193-103_0352.tif\n", "N193-107_0351.tif\n", "N193-163_0415.tif\n", "N193-163_0427.tif\n", "N193-199_0005.tif\n" ] } ], "source": [ "path = '/webdav/Shared/ANU-Library/Sydney Stock Exchange 1901-1950'\n", "for img_dir in [d for d in os.listdir(path) if os.path.isdir(os.path.join(path,d))]:\n", " for img in [i for i in os.listdir(os.path.join(path, img_dir)) if os.path.isfile(os.path.join(path, img_dir, i))]:\n", " if os.path.getsize(os.path.join(path, img_dir, img)) < MIN_FILE_SIZE:\n", " print(img)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }