{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import sys, os, re, time\n", "import urllib\n", "\n", "import numpy as np\n", "\n", "from IPython import parallel" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Downloading from flickr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This flickr parsing code is adapted from [here](http://megasnippets.com/source-codes/python/get_random_interesting_image_flickr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def extract_urls(html):\n", " \"\"\"Extract images URLs from a page.\"\"\"\n", " re_imageurl = re.compile(r'src=\"(http://farm\\d+.static.?flickr.com/\\d+/\\d+_\\w+.jpg)\"',re.IGNORECASE|re.DOTALL)\n", " urls = re_imageurl.findall(html)\n", " if len(urls)==0:\n", " return []\n", " return urls\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def urls_for_tag(tag='face', min_images=100, max_pages=20):\n", " \"\"\"get urls to flickr images with given tag(s)\n", "\n", " scrapes flickr search page\n", " \"\"\"\n", " urls = []\n", " page = 1\n", " while len(urls) < min_images and page <= max_pages:\n", " url = 'http://www.flickr.com/search/?q=%s&l=cc&ss=0&ct=0&mt=photos&w=all&adv=1&m=tags&page=%i' % (tag, page)\n", " print \"fetching %s\" % url\n", " urlfile = urllib.urlopen(url)\n", " # global html\n", " html= urlfile.read()\n", " # print html\n", " urlfile.close()\n", " page_urls = extract_urls(html)\n", " urls.extend(page_urls)\n", " print \"found %i images\" % len(urls)\n", " if not len(page_urls):\n", " print \"no new images\"\n", " break\n", " page += 1\n", " \n", " return urls" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "urls = urls_for_tag('portrait', 500)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def download_image(url, dest_dir='images'):\n", " \"\"\"download an image from a url into a directory\n", "\n", " returns the path to the downloaded image.\n", " \"\"\"\n", " import os\n", " basename = url.rsplit('/', 1)[-1]\n", " dest = os.path.join(dest_dir, basename)\n", " if not os.path.exists(dest_dir):\n", " os.makedirs(dest_dir)\n", " if os.path.exists(dest):\n", " print \"already have %s\" % dest\n", " return dest\n", " \n", " print \"downloading %s -> %s\" % (url, dest)\n", " urlf = urllib.urlopen(url)\n", " data = urlf.read()\n", " urlf.close()\n", " with open(dest, 'w') as f:\n", " f.write(data)\n", " return dest" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, initialize OpenCV for simple facial detection" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "HAAR_CASCADE_PATH = \"haarcascade_frontalface_default.xml\"\n", "# if you have opencv installed via homebrew, this would be in\n", "# /usr/local/share/OpenCV/haarcascades/\n", "\n", "import cv\n", "storage = cv.CreateMemStorage()\n", "cascade = cv.Load(HAAR_CASCADE_PATH)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then define a few functions for extracting faces from images" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def extract_faces(image, faces):\n", " \"\"\"Returns any faces in an image in a list of numpy arrays\"\"\"\n", " import numpy as np\n", " A = np.frombuffer(image.tostring(), dtype=np.uint8).reshape((image.height, image.width, image.nChannels))\n", " A = A[:,:,::-1]\n", " face_arrays = []\n", " for face in faces:\n", " Aface = A[face[1]:face[1]+face[3],face[0]:face[0]+face[2]]\n", " face_arrays.append(Aface)\n", " return face_arrays\n", "\n", "\n", "def detect_faces(filename):\n", " \"\"\"Loads an image into OpenCV, and detects faces\n", "\n", " returns None if no image is found,\n", " (filename, [list of numpy arrays]) if there are faces\n", " \"\"\"\n", " \n", " image = cv.LoadImage(filename)\n", " faces = []\n", " detected = cv.HaarDetectObjects(image, cascade, storage, 1.2, 2, cv.CV_HAAR_DO_CANNY_PRUNING, (100,100))\n", " if detected:\n", " for (x,y,w,h),n in detected:\n", " faces.append((x,y,w,h))\n", " if faces:\n", " return filename, extract_faces(image, faces)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And finally, a two-step function that downloads an image from a url,\n", "and detects faces in it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def faces_in_url(url):\n", " \"\"\"detect faces in an image downloaded from a url\"\"\"\n", " img_path = download_image(url)\n", " return detect_faces(img_path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If the network doesn't work,\n", "you can just generate a list of paths to images on your computer.\n", "For instance, these pictures are just everything from my iPhoto thumbnails directory,\n", "so vary from ~320x240 - 1024x768" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import glob\n", "library = os.path.expanduser(\"~/Pictures/2013.iphotolibrary\")\n", "pictures = []\n", "for directory, subdirs, files in os.walk(os.path.join(library, 'Thumbnails')):\n", " for fname in files:\n", " if fname.endswith('.jpg'):\n", " pictures.append(os.path.join(directory, fname))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Or this one, which globs pictures from a particular folder:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import glob\n", "pictures = glob.glob(\"images/*/*.jpg\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's test our " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for url in urls:\n", " found = faces_in_url(url)\n", " if found:\n", " break\n", "\n", "filename, faces = found\n", "for face in faces:\n", " plt.figure()\n", " plt.imshow(face)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If the network isn't kind to you, we can skip the downloads,\n", "and just use pictures we have on the filesystem:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for p in pictures:\n", " found = detect_faces(p)\n", " if found:\n", " break\n", "\n", "filename, faces = found\n", "for face in faces:\n", " plt.figure()\n", " plt.imshow(face)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hey, that looks like a face!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now in parallel" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, we connect our parallel Client" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rc = parallel.Client()\n", "all_engines = rc[:]\n", "view = rc.load_balanced_view()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then we initialize OpenCV on all of the engines (identical to what we did above)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%px\n", "%cd notebooks/parallel" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%px\n", "HAAR_CASCADE_PATH = \"haarcascade_frontalface_default.xml\"\n", "\n", "import os, urllib\n", "import cv\n", "storage = cv.CreateMemStorage()\n", "cascade = cv.Load(HAAR_CASCADE_PATH)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "and make sure `extract_faces` is defined everywhere" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "all_engines.push(dict(\n", " extract_faces=extract_faces,\n", " detect_faces=detect_faces,\n", " download_image=download_image,\n", "))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can iterate through all of our pictures, and detect and display any faces we find" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tic = time.time()\n", "# if you are running offline, do this one:\n", "# f = detect_faces\n", "# source = pictures\n", "\n", "# or you can download each image as part of the task:\n", "f = faces_in_url\n", "source = urls\n", "\n", "\n", "amr = view.map_async(f, source[:1000], ordered=False)\n", "nfound = 0\n", "for r in amr:\n", " if not r:\n", " continue\n", " filename, faces = r\n", " nfound += len(faces)\n", " print \"%i faces found in %s\" % (len(faces), filename)\n", " for face in faces:\n", " plt.imshow(face)\n", " plt.show()\n", "\n", "toc = time.time()\n", "\n", "print \"found %i faces in %i images in %f s\" % (nfound, len(amr), toc-tic)\n" ] } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 0 }