{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exploring the face data\n", "\n", "This notebook plays around with the [data](faces_per_image.csv) generated by running the facial detection script [over the whole Tribune collection](Finding-all-the-faces-in-the-Tribune-collection.ipynb)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import altair as alt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('faces_per_image.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
faces
count60364.000000
mean3.821433
std6.661479
min0.000000
25%1.000000
50%2.000000
75%4.000000
max174.000000
\n", "
" ], "text/plain": [ " faces\n", "count 60364.000000\n", "mean 3.821433\n", "std 6.661479\n", "min 0.000000\n", "25% 1.000000\n", "50% 2.000000\n", "75% 4.000000\n", "max 174.000000" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
facesimage
20402174FL4470426.jpg
28301117FL4494940.jpg
34768145FL4517063.jpg
36351111FL4522209.jpg
36353105FL4522212.jpg
37843109FL4526927.jpg
49708127FL4564048.jpg
50428104FL4566376.jpg
53969132FL4577551.jpg
54041110FL4577772.jpg
54044144FL4577775.jpg
54062112FL4577793.jpg
54172126FL4578163.jpg
54173111FL4578164.jpg
\n", "
" ], "text/plain": [ " faces image\n", "20402 174 FL4470426.jpg\n", "28301 117 FL4494940.jpg\n", "34768 145 FL4517063.jpg\n", "36351 111 FL4522209.jpg\n", "36353 105 FL4522212.jpg\n", "37843 109 FL4526927.jpg\n", "49708 127 FL4564048.jpg\n", "50428 104 FL4566376.jpg\n", "53969 132 FL4577551.jpg\n", "54041 110 FL4577772.jpg\n", "54044 144 FL4577775.jpg\n", "54062 112 FL4577793.jpg\n", "54172 126 FL4578163.jpg\n", "54173 111 FL4578164.jpg" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# More than 100 faces!?\n", "\n", "df.loc[df['faces'] > 100]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "alt.data_transformers.enable('json')\n", "alt.Chart(df).mark_bar().encode(\n", " x=alt.X('faces:Q', bin=alt.BinParams(maxbins=100)),\n", " y='count():Q'\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "230677" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['faces'].sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }