{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Classifying all the Tribune images\n", "\n", "Using the simple model we created in [this notebook](Training-a-classification-model-for-the-Tribune.ipynb), let's attempt to classify all the images in the Tribune collection." ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "import pandas as pd\n", "import os\n", "from urllib.parse import urlparse\n", "import requests\n", "from IPython.display import display, HTML\n", "import copy\n", "from tqdm import tqdm_notebook\n", "\n", "\n", "import sys\n", "import time\n", "\n", "import numpy as np\n", "import tensorflow as tf\n", "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# http://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License.\n", "# ==============================================================================\n", "\n", "def load_graph(model_file):\n", " graph = tf.Graph()\n", " graph_def = tf.GraphDef()\n", "\n", " with open(model_file, \"rb\") as f:\n", " graph_def.ParseFromString(f.read())\n", " with graph.as_default():\n", " tf.import_graph_def(graph_def)\n", "\n", " return graph\n", "\n", "def read_tensor_from_image_file(file_name, input_height=299, input_width=299,\n", "\t\t\t\tinput_mean=0, input_std=255):\n", " input_name = \"file_reader\"\n", " output_name = \"normalized\"\n", " file_reader = tf.read_file(file_name, input_name)\n", " if file_name.endswith(\".png\"):\n", " image_reader = tf.image.decode_png(file_reader, channels = 3,\n", " name='png_reader')\n", " elif file_name.endswith(\".gif\"):\n", " image_reader = tf.squeeze(tf.image.decode_gif(file_reader,\n", " name='gif_reader'))\n", " elif file_name.endswith(\".bmp\"):\n", " image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n", " else:\n", " image_reader = tf.image.decode_jpeg(file_reader, channels = 3,\n", " name='jpeg_reader')\n", " float_caster = tf.cast(image_reader, tf.float32)\n", " dims_expander = tf.expand_dims(float_caster, 0);\n", " resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n", " normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n", " sess = tf.Session()\n", " result = sess.run(normalized)\n", "\n", " return result\n", "\n", "def load_labels(label_file):\n", " label = []\n", " proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()\n", " for l in proto_as_ascii_lines:\n", " label.append(l.rstrip())\n", " return label\n", "\n" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "def label_tribune_image(file_name):\n", " model_file = \"tensorflow-for-poets-2/tf_files/tribune_graph.pb\"\n", " label_file = \"tensorflow-for-poets-2/tf_files/tribune_labels.txt\"\n", " input_height = 224\n", " input_width = 224\n", " input_mean = 128\n", " input_std = 128\n", " input_layer = \"input\"\n", " output_layer = \"final_result\"\n", " graph = load_graph(model_file)\n", " t = read_tensor_from_image_file(file_name,\n", " input_height=input_height,\n", " input_width=input_width,\n", " input_mean=input_mean,\n", " input_std=input_std)\n", "\n", " input_name = \"import/\" + input_layer\n", " output_name = \"import/\" + output_layer\n", " input_operation = graph.get_operation_by_name(input_name);\n", " output_operation = graph.get_operation_by_name(output_name);\n", "\n", " with tf.Session(graph=graph) as sess:\n", " start = time.time()\n", " results = sess.run(output_operation.outputs[0],\n", " {input_operation.outputs[0]: t})\n", " end=time.time()\n", " results = np.squeeze(results)\n", "\n", " top_k = results.argsort()[-5:][::-1]\n", " labels = load_labels(label_file)\n", " scores = {}\n", " for i in top_k:\n", " scores[labels[i]] = results[i]\n", " return scores\n", "\n", "\n", "def detect_all():\n", " '''\n", " I've already got copies of all the images, so I'll just point the script at them.\n", " '''\n", " face_data = []\n", " image_dir = '/Volumes/bigdata/mydata/SLNSW/Tribune/images/500'\n", " images = [i for i in os.listdir(image_dir) if i[-4:] == '.jpg']\n", " for image in tqdm_notebook(images):\n", " img_file = os.path.join(image_dir, image)\n", " scores = label_tribune_image(img_file)\n", " scores['image'] = image.replace('-500.jpg', '')\n", " results.append(scores)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results = []\n", "detect_all()" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(results)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
imageportraitsprotests
0FL18172972.656395e-091.000000
1FL18173007.659095e-030.992341
2FL18173034.747240e-050.999953
3FL18173071.007780e-040.999899
4FL18173101.188131e-101.000000
\n", "
" ], "text/plain": [ " image portraits protests\n", "0 FL1817297 2.656395e-09 1.000000\n", "1 FL1817300 7.659095e-03 0.992341\n", "2 FL1817303 4.747240e-05 0.999953\n", "3 FL1817307 1.007780e-04 0.999899\n", "4 FL1817310 1.188131e-10 1.000000" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
imageportraitsprotests
0FL18172970.000001.00000
1FL18173000.007660.99234
2FL18173030.000050.99995
3FL18173070.000100.99990
4FL18173100.000001.00000
5FL18173110.977230.02277
6FL18173120.972150.02785
7FL18173130.000290.99971
8FL18173170.998690.00131
9FL18173201.000000.00000
10FL18173230.000720.99928
11FL18173240.028860.97114
12FL18173250.999990.00001
13FL18173281.000000.00000
14FL18173300.951030.04897
15FL18173351.000000.00000
16FL18173360.996180.00382
17FL18173370.993820.00618
18FL18173420.982160.01784
19FL18173440.996740.00326
20FL18173460.139090.86091
21FL18173480.911260.08874
22FL18173490.000001.00000
23FL18173500.000001.00000
24FL18173510.000001.00000
25FL18173520.000001.00000
26FL18173560.000050.99995
27FL18173590.994670.00533
28FL18173600.000001.00000
29FL18173620.080090.91991
............
18487FL44644220.999920.00008
18488FL44644250.001200.99880
18489FL44644280.007520.99248
18490FL44644300.000001.00000
18491FL44644331.000000.00000
18492FL44644350.001200.99880
18493FL44644360.546980.45302
18494FL44644380.954470.04553
18495FL44644400.000001.00000
18496FL44644410.000020.99998
18497FL44644420.000001.00000
18498FL44644440.000010.99999
18499FL44644470.000001.00000
18500FL44644490.000010.99999
18501FL44644510.036240.96376
18502FL44644530.000001.00000
18503FL44644550.002260.99774
18504FL44644570.764470.23553
18505FL44644590.000040.99996
18506FL44644600.985780.01422
18507FL44644650.000170.99983
18508FL44644680.000100.99990
18509FL44644690.002360.99764
18510FL44644710.078990.92101
18511FL44644720.107200.89280
18512FL44644740.999990.00001
18513FL44644760.000001.00000
18514FL44644770.000010.99999
18515FL44644800.873230.12677
18516FL44644820.889420.11058
\n", "

18517 rows × 3 columns

\n", "
" ], "text/plain": [ " image portraits protests\n", "0 FL1817297 0.00000 1.00000\n", "1 FL1817300 0.00766 0.99234\n", "2 FL1817303 0.00005 0.99995\n", "3 FL1817307 0.00010 0.99990\n", "4 FL1817310 0.00000 1.00000\n", "5 FL1817311 0.97723 0.02277\n", "6 FL1817312 0.97215 0.02785\n", "7 FL1817313 0.00029 0.99971\n", "8 FL1817317 0.99869 0.00131\n", "9 FL1817320 1.00000 0.00000\n", "10 FL1817323 0.00072 0.99928\n", "11 FL1817324 0.02886 0.97114\n", "12 FL1817325 0.99999 0.00001\n", "13 FL1817328 1.00000 0.00000\n", "14 FL1817330 0.95103 0.04897\n", "15 FL1817335 1.00000 0.00000\n", "16 FL1817336 0.99618 0.00382\n", "17 FL1817337 0.99382 0.00618\n", "18 FL1817342 0.98216 0.01784\n", "19 FL1817344 0.99674 0.00326\n", "20 FL1817346 0.13909 0.86091\n", "21 FL1817348 0.91126 0.08874\n", "22 FL1817349 0.00000 1.00000\n", "23 FL1817350 0.00000 1.00000\n", "24 FL1817351 0.00000 1.00000\n", "25 FL1817352 0.00000 1.00000\n", "26 FL1817356 0.00005 0.99995\n", "27 FL1817359 0.99467 0.00533\n", "28 FL1817360 0.00000 1.00000\n", "29 FL1817362 0.08009 0.91991\n", "... ... ... ...\n", "18487 FL4464422 0.99992 0.00008\n", "18488 FL4464425 0.00120 0.99880\n", "18489 FL4464428 0.00752 0.99248\n", "18490 FL4464430 0.00000 1.00000\n", "18491 FL4464433 1.00000 0.00000\n", "18492 FL4464435 0.00120 0.99880\n", "18493 FL4464436 0.54698 0.45302\n", "18494 FL4464438 0.95447 0.04553\n", "18495 FL4464440 0.00000 1.00000\n", "18496 FL4464441 0.00002 0.99998\n", "18497 FL4464442 0.00000 1.00000\n", "18498 FL4464444 0.00001 0.99999\n", "18499 FL4464447 0.00000 1.00000\n", "18500 FL4464449 0.00001 0.99999\n", "18501 FL4464451 0.03624 0.96376\n", "18502 FL4464453 0.00000 1.00000\n", "18503 FL4464455 0.00226 0.99774\n", "18504 FL4464457 0.76447 0.23553\n", "18505 FL4464459 0.00004 0.99996\n", "18506 FL4464460 0.98578 0.01422\n", "18507 FL4464465 0.00017 0.99983\n", "18508 FL4464468 0.00010 0.99990\n", "18509 FL4464469 0.00236 0.99764\n", "18510 FL4464471 0.07899 0.92101\n", "18511 FL4464472 0.10720 0.89280\n", "18512 FL4464474 0.99999 0.00001\n", "18513 FL4464476 0.00000 1.00000\n", "18514 FL4464477 0.00001 0.99999\n", "18515 FL4464480 0.87323 0.12677\n", "18516 FL4464482 0.88942 0.11058\n", "\n", "[18517 rows x 3 columns]" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.round(5)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "df.to_csv('classified.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('classified.csv')" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
portraitsprotests
count1.851700e+041.851700e+04
mean2.729989e-017.270011e-01
std4.200135e-014.200135e-01
min6.136425e-172.156611e-14
25%2.731243e-072.685431e-01
50%2.668783e-049.997332e-01
75%7.314569e-019.999998e-01
max1.000000e+001.000000e+00
\n", "
" ], "text/plain": [ " portraits protests\n", "count 1.851700e+04 1.851700e+04\n", "mean 2.729989e-01 7.270011e-01\n", "std 4.200135e-01 4.200135e-01\n", "min 6.136425e-17 2.156611e-14\n", "25% 2.731243e-07 2.685431e-01\n", "50% 2.668783e-04 9.997332e-01\n", "75% 7.314569e-01 9.999998e-01\n", "max 1.000000e+00 1.000000e+00" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "portraits = df.loc[df['portraits'] > 0.95]" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "samples = portraits.sample(6)\n", "html = ''\n", "for image in samples['image'].tolist():\n", " html += ''.format(image)\n", "display(HTML(html))" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "protests = df.loc[df['protests'] > 0.95]" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "samples = protests.sample(6)\n", "html = ''\n", "for image in samples['image'].tolist():\n", " html += ''.format(image)\n", "display(HTML(html))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }