{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Classifying all the Tribune images\n",
"\n",
"Using the simple model we created in [this notebook](Training-a-classification-model-for-the-Tribune.ipynb), let's attempt to classify all the images in the Tribune collection."
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import absolute_import\n",
"from __future__ import division\n",
"from __future__ import print_function\n",
"\n",
"import pandas as pd\n",
"import os\n",
"from urllib.parse import urlparse\n",
"import requests\n",
"from IPython.display import display, HTML\n",
"import copy\n",
"from tqdm import tqdm_notebook\n",
"\n",
"\n",
"import sys\n",
"import time\n",
"\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"# Copyright 2017 The TensorFlow Authors. All Rights Reserved.\n",
"#\n",
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"# you may not use this file except in compliance with the License.\n",
"# You may obtain a copy of the License at\n",
"#\n",
"# http://www.apache.org/licenses/LICENSE-2.0\n",
"#\n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License.\n",
"# ==============================================================================\n",
"\n",
"def load_graph(model_file):\n",
" graph = tf.Graph()\n",
" graph_def = tf.GraphDef()\n",
"\n",
" with open(model_file, \"rb\") as f:\n",
" graph_def.ParseFromString(f.read())\n",
" with graph.as_default():\n",
" tf.import_graph_def(graph_def)\n",
"\n",
" return graph\n",
"\n",
"def read_tensor_from_image_file(file_name, input_height=299, input_width=299,\n",
"\t\t\t\tinput_mean=0, input_std=255):\n",
" input_name = \"file_reader\"\n",
" output_name = \"normalized\"\n",
" file_reader = tf.read_file(file_name, input_name)\n",
" if file_name.endswith(\".png\"):\n",
" image_reader = tf.image.decode_png(file_reader, channels = 3,\n",
" name='png_reader')\n",
" elif file_name.endswith(\".gif\"):\n",
" image_reader = tf.squeeze(tf.image.decode_gif(file_reader,\n",
" name='gif_reader'))\n",
" elif file_name.endswith(\".bmp\"):\n",
" image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n",
" else:\n",
" image_reader = tf.image.decode_jpeg(file_reader, channels = 3,\n",
" name='jpeg_reader')\n",
" float_caster = tf.cast(image_reader, tf.float32)\n",
" dims_expander = tf.expand_dims(float_caster, 0);\n",
" resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n",
" normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n",
" sess = tf.Session()\n",
" result = sess.run(normalized)\n",
"\n",
" return result\n",
"\n",
"def load_labels(label_file):\n",
" label = []\n",
" proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()\n",
" for l in proto_as_ascii_lines:\n",
" label.append(l.rstrip())\n",
" return label\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"def label_tribune_image(file_name):\n",
" model_file = \"tensorflow-for-poets-2/tf_files/tribune_graph.pb\"\n",
" label_file = \"tensorflow-for-poets-2/tf_files/tribune_labels.txt\"\n",
" input_height = 224\n",
" input_width = 224\n",
" input_mean = 128\n",
" input_std = 128\n",
" input_layer = \"input\"\n",
" output_layer = \"final_result\"\n",
" graph = load_graph(model_file)\n",
" t = read_tensor_from_image_file(file_name,\n",
" input_height=input_height,\n",
" input_width=input_width,\n",
" input_mean=input_mean,\n",
" input_std=input_std)\n",
"\n",
" input_name = \"import/\" + input_layer\n",
" output_name = \"import/\" + output_layer\n",
" input_operation = graph.get_operation_by_name(input_name);\n",
" output_operation = graph.get_operation_by_name(output_name);\n",
"\n",
" with tf.Session(graph=graph) as sess:\n",
" start = time.time()\n",
" results = sess.run(output_operation.outputs[0],\n",
" {input_operation.outputs[0]: t})\n",
" end=time.time()\n",
" results = np.squeeze(results)\n",
"\n",
" top_k = results.argsort()[-5:][::-1]\n",
" labels = load_labels(label_file)\n",
" scores = {}\n",
" for i in top_k:\n",
" scores[labels[i]] = results[i]\n",
" return scores\n",
"\n",
"\n",
"def detect_all():\n",
" '''\n",
" I've already got copies of all the images, so I'll just point the script at them.\n",
" '''\n",
" face_data = []\n",
" image_dir = '/Volumes/bigdata/mydata/SLNSW/Tribune/images/500'\n",
" images = [i for i in os.listdir(image_dir) if i[-4:] == '.jpg']\n",
" for image in tqdm_notebook(images):\n",
" img_file = os.path.join(image_dir, image)\n",
" scores = label_tribune_image(img_file)\n",
" scores['image'] = image.replace('-500.jpg', '')\n",
" results.append(scores)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results = []\n",
"detect_all()"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(results)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" image | \n",
" portraits | \n",
" protests | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" FL1817297 | \n",
" 2.656395e-09 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" FL1817300 | \n",
" 7.659095e-03 | \n",
" 0.992341 | \n",
"
\n",
" \n",
" 2 | \n",
" FL1817303 | \n",
" 4.747240e-05 | \n",
" 0.999953 | \n",
"
\n",
" \n",
" 3 | \n",
" FL1817307 | \n",
" 1.007780e-04 | \n",
" 0.999899 | \n",
"
\n",
" \n",
" 4 | \n",
" FL1817310 | \n",
" 1.188131e-10 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" image portraits protests\n",
"0 FL1817297 2.656395e-09 1.000000\n",
"1 FL1817300 7.659095e-03 0.992341\n",
"2 FL1817303 4.747240e-05 0.999953\n",
"3 FL1817307 1.007780e-04 0.999899\n",
"4 FL1817310 1.188131e-10 1.000000"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" image | \n",
" portraits | \n",
" protests | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" FL1817297 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 1 | \n",
" FL1817300 | \n",
" 0.00766 | \n",
" 0.99234 | \n",
"
\n",
" \n",
" 2 | \n",
" FL1817303 | \n",
" 0.00005 | \n",
" 0.99995 | \n",
"
\n",
" \n",
" 3 | \n",
" FL1817307 | \n",
" 0.00010 | \n",
" 0.99990 | \n",
"
\n",
" \n",
" 4 | \n",
" FL1817310 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 5 | \n",
" FL1817311 | \n",
" 0.97723 | \n",
" 0.02277 | \n",
"
\n",
" \n",
" 6 | \n",
" FL1817312 | \n",
" 0.97215 | \n",
" 0.02785 | \n",
"
\n",
" \n",
" 7 | \n",
" FL1817313 | \n",
" 0.00029 | \n",
" 0.99971 | \n",
"
\n",
" \n",
" 8 | \n",
" FL1817317 | \n",
" 0.99869 | \n",
" 0.00131 | \n",
"
\n",
" \n",
" 9 | \n",
" FL1817320 | \n",
" 1.00000 | \n",
" 0.00000 | \n",
"
\n",
" \n",
" 10 | \n",
" FL1817323 | \n",
" 0.00072 | \n",
" 0.99928 | \n",
"
\n",
" \n",
" 11 | \n",
" FL1817324 | \n",
" 0.02886 | \n",
" 0.97114 | \n",
"
\n",
" \n",
" 12 | \n",
" FL1817325 | \n",
" 0.99999 | \n",
" 0.00001 | \n",
"
\n",
" \n",
" 13 | \n",
" FL1817328 | \n",
" 1.00000 | \n",
" 0.00000 | \n",
"
\n",
" \n",
" 14 | \n",
" FL1817330 | \n",
" 0.95103 | \n",
" 0.04897 | \n",
"
\n",
" \n",
" 15 | \n",
" FL1817335 | \n",
" 1.00000 | \n",
" 0.00000 | \n",
"
\n",
" \n",
" 16 | \n",
" FL1817336 | \n",
" 0.99618 | \n",
" 0.00382 | \n",
"
\n",
" \n",
" 17 | \n",
" FL1817337 | \n",
" 0.99382 | \n",
" 0.00618 | \n",
"
\n",
" \n",
" 18 | \n",
" FL1817342 | \n",
" 0.98216 | \n",
" 0.01784 | \n",
"
\n",
" \n",
" 19 | \n",
" FL1817344 | \n",
" 0.99674 | \n",
" 0.00326 | \n",
"
\n",
" \n",
" 20 | \n",
" FL1817346 | \n",
" 0.13909 | \n",
" 0.86091 | \n",
"
\n",
" \n",
" 21 | \n",
" FL1817348 | \n",
" 0.91126 | \n",
" 0.08874 | \n",
"
\n",
" \n",
" 22 | \n",
" FL1817349 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 23 | \n",
" FL1817350 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 24 | \n",
" FL1817351 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 25 | \n",
" FL1817352 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 26 | \n",
" FL1817356 | \n",
" 0.00005 | \n",
" 0.99995 | \n",
"
\n",
" \n",
" 27 | \n",
" FL1817359 | \n",
" 0.99467 | \n",
" 0.00533 | \n",
"
\n",
" \n",
" 28 | \n",
" FL1817360 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 29 | \n",
" FL1817362 | \n",
" 0.08009 | \n",
" 0.91991 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 18487 | \n",
" FL4464422 | \n",
" 0.99992 | \n",
" 0.00008 | \n",
"
\n",
" \n",
" 18488 | \n",
" FL4464425 | \n",
" 0.00120 | \n",
" 0.99880 | \n",
"
\n",
" \n",
" 18489 | \n",
" FL4464428 | \n",
" 0.00752 | \n",
" 0.99248 | \n",
"
\n",
" \n",
" 18490 | \n",
" FL4464430 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 18491 | \n",
" FL4464433 | \n",
" 1.00000 | \n",
" 0.00000 | \n",
"
\n",
" \n",
" 18492 | \n",
" FL4464435 | \n",
" 0.00120 | \n",
" 0.99880 | \n",
"
\n",
" \n",
" 18493 | \n",
" FL4464436 | \n",
" 0.54698 | \n",
" 0.45302 | \n",
"
\n",
" \n",
" 18494 | \n",
" FL4464438 | \n",
" 0.95447 | \n",
" 0.04553 | \n",
"
\n",
" \n",
" 18495 | \n",
" FL4464440 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 18496 | \n",
" FL4464441 | \n",
" 0.00002 | \n",
" 0.99998 | \n",
"
\n",
" \n",
" 18497 | \n",
" FL4464442 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 18498 | \n",
" FL4464444 | \n",
" 0.00001 | \n",
" 0.99999 | \n",
"
\n",
" \n",
" 18499 | \n",
" FL4464447 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 18500 | \n",
" FL4464449 | \n",
" 0.00001 | \n",
" 0.99999 | \n",
"
\n",
" \n",
" 18501 | \n",
" FL4464451 | \n",
" 0.03624 | \n",
" 0.96376 | \n",
"
\n",
" \n",
" 18502 | \n",
" FL4464453 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 18503 | \n",
" FL4464455 | \n",
" 0.00226 | \n",
" 0.99774 | \n",
"
\n",
" \n",
" 18504 | \n",
" FL4464457 | \n",
" 0.76447 | \n",
" 0.23553 | \n",
"
\n",
" \n",
" 18505 | \n",
" FL4464459 | \n",
" 0.00004 | \n",
" 0.99996 | \n",
"
\n",
" \n",
" 18506 | \n",
" FL4464460 | \n",
" 0.98578 | \n",
" 0.01422 | \n",
"
\n",
" \n",
" 18507 | \n",
" FL4464465 | \n",
" 0.00017 | \n",
" 0.99983 | \n",
"
\n",
" \n",
" 18508 | \n",
" FL4464468 | \n",
" 0.00010 | \n",
" 0.99990 | \n",
"
\n",
" \n",
" 18509 | \n",
" FL4464469 | \n",
" 0.00236 | \n",
" 0.99764 | \n",
"
\n",
" \n",
" 18510 | \n",
" FL4464471 | \n",
" 0.07899 | \n",
" 0.92101 | \n",
"
\n",
" \n",
" 18511 | \n",
" FL4464472 | \n",
" 0.10720 | \n",
" 0.89280 | \n",
"
\n",
" \n",
" 18512 | \n",
" FL4464474 | \n",
" 0.99999 | \n",
" 0.00001 | \n",
"
\n",
" \n",
" 18513 | \n",
" FL4464476 | \n",
" 0.00000 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" 18514 | \n",
" FL4464477 | \n",
" 0.00001 | \n",
" 0.99999 | \n",
"
\n",
" \n",
" 18515 | \n",
" FL4464480 | \n",
" 0.87323 | \n",
" 0.12677 | \n",
"
\n",
" \n",
" 18516 | \n",
" FL4464482 | \n",
" 0.88942 | \n",
" 0.11058 | \n",
"
\n",
" \n",
"
\n",
"
18517 rows × 3 columns
\n",
"
"
],
"text/plain": [
" image portraits protests\n",
"0 FL1817297 0.00000 1.00000\n",
"1 FL1817300 0.00766 0.99234\n",
"2 FL1817303 0.00005 0.99995\n",
"3 FL1817307 0.00010 0.99990\n",
"4 FL1817310 0.00000 1.00000\n",
"5 FL1817311 0.97723 0.02277\n",
"6 FL1817312 0.97215 0.02785\n",
"7 FL1817313 0.00029 0.99971\n",
"8 FL1817317 0.99869 0.00131\n",
"9 FL1817320 1.00000 0.00000\n",
"10 FL1817323 0.00072 0.99928\n",
"11 FL1817324 0.02886 0.97114\n",
"12 FL1817325 0.99999 0.00001\n",
"13 FL1817328 1.00000 0.00000\n",
"14 FL1817330 0.95103 0.04897\n",
"15 FL1817335 1.00000 0.00000\n",
"16 FL1817336 0.99618 0.00382\n",
"17 FL1817337 0.99382 0.00618\n",
"18 FL1817342 0.98216 0.01784\n",
"19 FL1817344 0.99674 0.00326\n",
"20 FL1817346 0.13909 0.86091\n",
"21 FL1817348 0.91126 0.08874\n",
"22 FL1817349 0.00000 1.00000\n",
"23 FL1817350 0.00000 1.00000\n",
"24 FL1817351 0.00000 1.00000\n",
"25 FL1817352 0.00000 1.00000\n",
"26 FL1817356 0.00005 0.99995\n",
"27 FL1817359 0.99467 0.00533\n",
"28 FL1817360 0.00000 1.00000\n",
"29 FL1817362 0.08009 0.91991\n",
"... ... ... ...\n",
"18487 FL4464422 0.99992 0.00008\n",
"18488 FL4464425 0.00120 0.99880\n",
"18489 FL4464428 0.00752 0.99248\n",
"18490 FL4464430 0.00000 1.00000\n",
"18491 FL4464433 1.00000 0.00000\n",
"18492 FL4464435 0.00120 0.99880\n",
"18493 FL4464436 0.54698 0.45302\n",
"18494 FL4464438 0.95447 0.04553\n",
"18495 FL4464440 0.00000 1.00000\n",
"18496 FL4464441 0.00002 0.99998\n",
"18497 FL4464442 0.00000 1.00000\n",
"18498 FL4464444 0.00001 0.99999\n",
"18499 FL4464447 0.00000 1.00000\n",
"18500 FL4464449 0.00001 0.99999\n",
"18501 FL4464451 0.03624 0.96376\n",
"18502 FL4464453 0.00000 1.00000\n",
"18503 FL4464455 0.00226 0.99774\n",
"18504 FL4464457 0.76447 0.23553\n",
"18505 FL4464459 0.00004 0.99996\n",
"18506 FL4464460 0.98578 0.01422\n",
"18507 FL4464465 0.00017 0.99983\n",
"18508 FL4464468 0.00010 0.99990\n",
"18509 FL4464469 0.00236 0.99764\n",
"18510 FL4464471 0.07899 0.92101\n",
"18511 FL4464472 0.10720 0.89280\n",
"18512 FL4464474 0.99999 0.00001\n",
"18513 FL4464476 0.00000 1.00000\n",
"18514 FL4464477 0.00001 0.99999\n",
"18515 FL4464480 0.87323 0.12677\n",
"18516 FL4464482 0.88942 0.11058\n",
"\n",
"[18517 rows x 3 columns]"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.round(5)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('classified.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('classified.csv')"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" portraits | \n",
" protests | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1.851700e+04 | \n",
" 1.851700e+04 | \n",
"
\n",
" \n",
" mean | \n",
" 2.729989e-01 | \n",
" 7.270011e-01 | \n",
"
\n",
" \n",
" std | \n",
" 4.200135e-01 | \n",
" 4.200135e-01 | \n",
"
\n",
" \n",
" min | \n",
" 6.136425e-17 | \n",
" 2.156611e-14 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.731243e-07 | \n",
" 2.685431e-01 | \n",
"
\n",
" \n",
" 50% | \n",
" 2.668783e-04 | \n",
" 9.997332e-01 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.314569e-01 | \n",
" 9.999998e-01 | \n",
"
\n",
" \n",
" max | \n",
" 1.000000e+00 | \n",
" 1.000000e+00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" portraits protests\n",
"count 1.851700e+04 1.851700e+04\n",
"mean 2.729989e-01 7.270011e-01\n",
"std 4.200135e-01 4.200135e-01\n",
"min 6.136425e-17 2.156611e-14\n",
"25% 2.731243e-07 2.685431e-01\n",
"50% 2.668783e-04 9.997332e-01\n",
"75% 7.314569e-01 9.999998e-01\n",
"max 1.000000e+00 1.000000e+00"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"portraits = df.loc[df['portraits'] > 0.95]"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"samples = portraits.sample(6)\n",
"html = ''\n",
"for image in samples['image'].tolist():\n",
" html += ''.format(image)\n",
"display(HTML(html))"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"protests = df.loc[df['protests'] > 0.95]"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"samples = protests.sample(6)\n",
"html = ''\n",
"for image in samples['image'].tolist():\n",
" html += ''.format(image)\n",
"display(HTML(html))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}