{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Basic image recognition with Tribune photos\n",
    "\n",
    "Based on the Tensorflow [image recognition tutorial](https://www.tensorflow.org/tutorials/images/image_recognition). This uses Inception-v3 to classify images according to these [1000 classes](http://image-net.org/challenges/LSVRC/2014/browse-synsets)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from urllib.parse import urlparse\n",
    "import pandas as pd\n",
    "import requests\n",
    "from IPython.display import display, HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Tribune images data\n",
    "df = pd.read_csv('https://raw.githubusercontent.com/GLAM-Workbench/ozglam-data-records-of-resistance/master/data/images.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Download and unpack the pretrained model\n",
    "%run classify_image.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def select_images(sample):\n",
    "    images = []\n",
    "    rows = df.sample(sample)\n",
    "    for img_id in list(rows['images']):\n",
    "        img_url = 'https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/500/{0}-500.jpg'.format(img_id)\n",
    "        images.append((img_id, img_url))\n",
    "    return images\n",
    "\n",
    "\n",
    "def download_image(img_url):\n",
    "    current_dir = os.getcwd()\n",
    "    parsed = urlparse(img_url)\n",
    "    filename = os.path.join(current_dir, os.path.basename(parsed.path))\n",
    "    response = requests.get(img_url, stream=True)\n",
    "    with open(filename, 'wb') as fd:\n",
    "        for chunk in response.iter_content(chunk_size=128):\n",
    "            fd.write(chunk)\n",
    "    return filename\n",
    "\n",
    "\n",
    "def recognise_images(sample=10):\n",
    "    images = select_images(sample)\n",
    "    for img_id, img_url in images:\n",
    "        filename = download_image(img_url)\n",
    "        display(HTML('<image src=\"{}\"><br><a target=\"_blank\" href=\"http://digital.sl.nsw.gov.au/delivery/DeliveryManagerServlet?dps_pid={}&embedded=true&toolbar=false\">More details at SLNSW</a>'.format(img_url, img_id)))\n",
    "        %run classify_image.py --image_file $filename\n",
    "        #%run label_image.py --image $filename\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "recognise_images(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}