{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Reverse-Visual-Search.ipynb", "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "#### Prepare dataset" ], "metadata": { "id": "6cNR7HLT0O0f" } }, { "cell_type": "markdown", "source": [ "We will do reverse visual search on [LFW(Labeled Faces in the Wild)](http://vis-www.cs.umass.edu/lfw/) dataset. [Download](http://vis-www.cs.umass.edu/lfw/lfw.tgz) to have a direct look.\n", "\n", "\n", "Download dataset, extract all images" ], "metadata": { "id": "fQCTXzyw0UeI" } }, { "cell_type": "code", "source": [ "import os\n", "import requests\n", "if not os.path.exists('lfw.tgz'):\n", " open('lfw.tgz', 'wb').write(requests.get('http://vis-www.cs.umass.edu/lfw/lfw.tgz').content)\n", "\n", "import tarfile\n", "tar = tarfile.open(\"lfw.tgz\")\n", "tar.extractall()\n", "tar.close()" ], "metadata": { "id": "xxOEJKtv04k_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from os import listdir\n", "\n", "extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']\n", "\n", "\n", "def prepare_data():\n", " dirs = []\n", " files = []\n", " for root, _, filenames in os.walk('lfw'):\n", " for filename in filenames:\n", " ext = os.path.splitext(filename)[1]\n", " if ext in extensions:\n", " dir = root.split('/')[1]\n", " filepath = os.path.join(root, filename)\n", " dirs.append(dir)\n", " files.append(filepath)\n", " return dirs, files\n", "\n", "Y, X = prepare_data()\n", "\n", "# lfw data\n", "TRAIN_SAMPLES = 13233\n", "IMG_WIDTH, IMG_HEIGHT = 224, 224\n", "NUM_CLASSES = 5749" ], "metadata": { "id": "VwNZm4Jk2hvl" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Download preprocessed data" ], "metadata": { "id": "icT87qMOFAvN" } }, { "cell_type": "code", "source": [ "def download_preprocessed_files():\n", " file_names = ['facenet_keras.h5', 'features-deepface.pickle',\n", " 'features-facenet.pickle', 'features-inception.pickle',\n", " 'features-resnet.pickle', 'features-vgg16.pickle', \n", " 'features-vggface.pickle', 'features-xception.pickle']\n", " base_url = 'https://dl.acytoo.com/rvs/'\n", " for f in file_names:\n", " if not os.path.exists(f):\n", " open(f, 'wb').write(requests.get(base_url + f).content)\n", "\n", "download_preprocessed_files()" ], "metadata": { "id": "5Od5SjpPE-aY" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "#### First approach\n", "\n" ], "metadata": { "id": "XhEcCjPryeTW" } }, { "cell_type": "markdown", "source": [ "Using pretrained neural networks, remove the last layer, extracting features.\n", "\n", "Part of the code learned from https://github.com/PracticalDL/Practical-Deep-Learning-Book/tree/master/code/chapter-4\n" ], "metadata": { "id": "gmkntT6Hy8Dn" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "H-m3uSazyddJ" }, "outputs": [], "source": [ "!pip install mtcnn\n", "!pip install git+https://github.com/rcmalli/keras-vggface.git\n", "!pip install keras_applications\n", "!pip install faiss-gpu\n", "filename = \"/usr/local/lib/python3.7/dist-packages/keras_vggface/models.py\"\n", "text = open(filename).read()\n", "open(filename, \"w+\").write(text.replace('keras.engine.topology', 'tensorflow.keras.utils'))\n", "import mtcnn\n", "from PIL import Image\n", "import numpy as np\n", "from numpy.linalg import norm\n", "import pickle\n", "from tqdm import notebook\n", "import os\n", "import random\n", "import time\n", "import math\n", "import tensorflow\n", "import tarfile\n", "from sklearn.neighbors import NearestNeighbors\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import matplotlib.image as mpimg\n", "from tensorflow.keras.preprocessing import image\n", "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", "from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input\n", "from tensorflow.keras.applications.vgg16 import VGG16\n", "from tensorflow.keras.applications.mobilenet import MobileNet\n", "from tensorflow.keras.applications.inception_v3 import InceptionV3\n", "from tensorflow.keras.applications.xception import Xception\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D\n", "from sklearn.decomposition import PCA\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from keras.models import load_model\n", "from keras_vggface.vggface import VGGFace\n", "from sklearn.neighbors import KNeighborsClassifier\n", "import faiss\n" ] }, { "cell_type": "code", "source": [ "number_of_neighbors = 5\n", "\n", "\n", "def model_picker(name):\n", " if (name == 'vgg16'):\n", " model = VGG16(weights='imagenet',\n", " include_top=False,\n", " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),\n", " pooling='max')\n", " elif (name == 'mobilenet'):\n", " model = MobileNet(weights='imagenet',\n", " include_top=False,\n", " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),\n", " pooling='max',\n", " depth_multiplier=1,\n", " alpha=1)\n", " elif (name == 'inception'):\n", " model = InceptionV3(weights='imagenet',\n", " include_top=False,\n", " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),\n", " pooling='max')\n", " elif (name == 'resnet'):\n", " model = ResNet50(weights='imagenet',\n", " include_top=False,\n", " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),\n", " pooling='max')\n", " elif (name == 'xception'):\n", " model = Xception(weights='imagenet',\n", " include_top=False,\n", " input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),\n", " pooling='max')\n", " elif (name == 'facenet'):\n", " model = load_model('facenet_keras.h5', compile=False)\n", " elif (name == 'vggface'):\n", " model = VGGFace(model='resnet50', include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), pooling='avg')\n", " else:\n", " model = None\n", " print(\"Specified model not available\")\n", " return model\n", "\n" ], "metadata": { "id": "i6uYmChVE58-" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model_architecture = 'facenet'\n", "model = model_picker(model_architecture)" ], "metadata": { "id": "pGPWTyVHB1ux" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Extract features function" ], "metadata": { "id": "TJLhrsewIV9V" } }, { "cell_type": "code", "source": [ "def extract_features_from_one(img_path, model):\n", " \"\"\"\n", " Extract features from one image\n", " \"\"\"\n", " input_shape = (IMG_WIDTH, IMG_HEIGHT, 3)\n", " img = image.load_img(img_path,\n", " target_size=(input_shape[0], input_shape[1]))\n", " img_array = image.img_to_array(img)\n", " expanded_img_array = np.expand_dims(img_array, axis=0)\n", " preprocessed_img = preprocess_input(expanded_img_array)\n", " features = model.predict(preprocessed_img)\n", " flattened_features = features.flatten()\n", " normalized_features = flattened_features / norm(flattened_features)\n", " return normalized_features" ], "metadata": { "id": "hqIgh4nXIO8X" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Save features function" ], "metadata": { "id": "2-9f0Bb7IYcX" } }, { "cell_type": "code", "source": [ "def save_features(features_list, filename):\n", " pickle.dump(features_list, open(filename, 'wb'))" ], "metadata": { "id": "ULwlXCaiIaQp" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### extract features" ], "metadata": { "id": "UilQjGOlIuHb" } }, { "cell_type": "code", "source": [ "def extract_features(image_paths, model):\n", " features_all = []\n", " for image_path in image_paths:\n", " features_all.append(extract_features_from_one(X[0], model))\n", " return features_all\n", "\n", "features_all = extract_features(X, model)\n", "save_features(features_all, model_architecture + '_features.pickle')" ], "metadata": { "id": "8uWxVnyqItwH" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Feature extraction takes quite a long time, load pre processed features:" ], "metadata": { "id": "7lIYgF8GLrz5" } }, { "cell_type": "code", "source": [ "def read_all_files():\n", " standard_feature_list = []\n", " with open('features-' + model_architecture +'.pickle', 'rb') as f:\n", " standard_feature_list = pickle.load(f)\n", " return standard_feature_list\n", "\n", "\n", "standard_feature_list = read_all_files()" ], "metadata": { "id": "T_WxPbwMJXhZ" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Visual search" ], "metadata": { "id": "42kU6BSdMgf7" } }, { "cell_type": "code", "source": [ "neighbors = NearestNeighbors(n_neighbors=number_of_neighbors,\n", " algorithm='brute',\n", " metric='euclidean').fit(standard_feature_list, Y)" ], "metadata": { "id": "oPEnBkvkMi0a" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Helper function to get the classname\n", "def classname(str):\n", " return str.split('/')[-2]\n", "\n", "\n", "# Helper function to get the classname and filename\n", "def classname_filename(str):\n", " return str.split('/')[-2] + '/' + str.split('/')[-1]\n", "\n", "\n", "# Helper functions to plot the nearest images given a query image\n", "def plot_images(filenames, distances):\n", " images = []\n", " for filename in filenames:\n", " images.append(mpimg.imread(filename))\n", " plt.figure(figsize=(20, 10))\n", " columns = 5\n", " for i, image in enumerate(images):\n", " ax = plt.subplot(len(images) / columns + 1, columns, i + 1)\n", " if i == 0:\n", " ax.set_title(\"Query Image\\n\" + classname_filename(filenames[i]))\n", " else:\n", " ax.set_title(\"Similar Image\\n\" + classname_filename(filenames[i]) +\n", " \"\\nDistance: \" +\n", " str(float(\"{0:.2f}\".format(distances[i]))))\n", " plt.imshow(image)" ], "metadata": { "id": "ntSRhB5aM13K" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "test_idx = random.randint(0, len(X))\n", "test_file, test_label = X[test_idx], Y[test_idx]\n", "\n", "test_feature = extract_features_from_one(test_file, model)\n", "distances, indices = neighbors.kneighbors([test_feature])\n", "similar_image_paths = [test_file] + \\\n", " [X[indices[0][j]] for j in range(1, 5)]\n", "plot_images(similar_image_paths, distances[0])" ], "metadata": { "id": "TzPyqNo7M-rI" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "#### Improvement" ], "metadata": { "id": "BWod9-mBy87K" } }, { "cell_type": "markdown", "source": [ "##### Extract faceial part" ], "metadata": { "id": "ToaniixHPglT" } }, { "cell_type": "code", "source": [ "def extract_face(filename, required_size=(160, 160)):\n", "\t# load image from file\n", "\timage = Image.open(filename)\n", "\t# convert to RGB, if needed\n", "\timage = image.convert('RGB')\n", "\t# convert to array\n", "\tpixels = np.asarray(image)\n", "\t# create the detector, using default weights\n", "\tdetector = mtcnn.MTCNN()\n", "\t# detect faces in the image\n", "\tresults = detector.detect_faces(pixels)\n", "\t# extract the bounding box from the first face\n", "\tx1, y1, width, height = results[0]['box']\n", "\t# bug fix\n", "\tx1, y1 = abs(x1), abs(y1)\n", "\tx2, y2 = x1 + width, y1 + height\n", "\t# extract the face\n", "\tface = pixels[y1:y2, x1:x2]\n", "\t# resize pixels to the model size\n", "\timage = Image.fromarray(face)\n", "\timage = image.resize(required_size)\n", "\tface_array = np.asarray(image)\n", "\treturn face_array" ], "metadata": { "id": "l18FafQfPfxA" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Get embeddings" ], "metadata": { "id": "MoIQPGcrPy-f" } }, { "cell_type": "code", "source": [ "# get the face embedding for one face\n", "def get_embedding(model, face_pixels):\n", "\t# scale pixel values\n", "\tface_pixels = face_pixels.astype('float32')\n", "\t# standardize pixel values across channels (global)\n", "\tmean, std = face_pixels.mean(), face_pixels.std()\n", "\tface_pixels = (face_pixels - mean) / std\n", "\t# transform face into one sample\n", "\tsamples = np.expand_dims(face_pixels, axis=0)\n", "\t# make prediction to get embedding\n", "\tyhat = model.predict(samples)\n", "\treturn yhat[0]" ], "metadata": { "id": "p1b1Da3nP0I5" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Facenet model" ], "metadata": { "id": "47cgq86WP4Zv" } }, { "cell_type": "code", "source": [ "from tqdm import notebook\n", "newTrainX = []\n", "for i in notebook.tqdm(range(len(X))):\n", " detected_faces = extract_face(X[i])\n", " embedding = get_embedding(model, detected_faces)\n", " newTrainX.append(embedding)" ], "metadata": { "id": "dCMvHBLHP1ea" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### similar search: nearest neighbors" ], "metadata": { "id": "Z5Doi7ziQ_-b" } }, { "cell_type": "code", "source": [ "newTrainX = standard_feature_list\n", "neighbors = NearestNeighbors(n_neighbors=number_of_neighbors,\n", " algorithm='kd_tree',\n", " metric='euclidean').fit(newTrainX, Y)" ], "metadata": { "id": "wMXeUaYjP5Z3" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "test_idx = random.randint(0, len(X))\n", "test_file, test_label = X[test_idx], Y[test_idx]\n", "\n", "detected_faces = extract_face(test_file)\n", "embedding = get_embedding(model, detected_faces)\n", "distances, indices = neighbors.kneighbors([embedding])\n", "similar_image_paths = [test_file] + \\\n", " [X[indices[0][j]] for j in range(1, 5)]\n", "plot_images(similar_image_paths, distances[0])" ], "metadata": { "id": "MqQLr-5gRQnD" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "" ], "metadata": { "id": "BKXH8JMbRlnY" }, "execution_count": null, "outputs": [] } ] }