{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploring the face data\n",
"\n",
"This notebook plays around with the [data](faces_per_image.csv) generated by running the facial detection script [over the whole Tribune collection](Finding-all-the-faces-in-the-Tribune-collection.ipynb)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import altair as alt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('faces_per_image.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" faces | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 60364.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 3.821433 | \n",
"
\n",
" \n",
" std | \n",
" 6.661479 | \n",
"
\n",
" \n",
" min | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 2.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 4.000000 | \n",
"
\n",
" \n",
" max | \n",
" 174.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" faces\n",
"count 60364.000000\n",
"mean 3.821433\n",
"std 6.661479\n",
"min 0.000000\n",
"25% 1.000000\n",
"50% 2.000000\n",
"75% 4.000000\n",
"max 174.000000"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" faces | \n",
" image | \n",
"
\n",
" \n",
" \n",
" \n",
" 20402 | \n",
" 174 | \n",
" FL4470426.jpg | \n",
"
\n",
" \n",
" 28301 | \n",
" 117 | \n",
" FL4494940.jpg | \n",
"
\n",
" \n",
" 34768 | \n",
" 145 | \n",
" FL4517063.jpg | \n",
"
\n",
" \n",
" 36351 | \n",
" 111 | \n",
" FL4522209.jpg | \n",
"
\n",
" \n",
" 36353 | \n",
" 105 | \n",
" FL4522212.jpg | \n",
"
\n",
" \n",
" 37843 | \n",
" 109 | \n",
" FL4526927.jpg | \n",
"
\n",
" \n",
" 49708 | \n",
" 127 | \n",
" FL4564048.jpg | \n",
"
\n",
" \n",
" 50428 | \n",
" 104 | \n",
" FL4566376.jpg | \n",
"
\n",
" \n",
" 53969 | \n",
" 132 | \n",
" FL4577551.jpg | \n",
"
\n",
" \n",
" 54041 | \n",
" 110 | \n",
" FL4577772.jpg | \n",
"
\n",
" \n",
" 54044 | \n",
" 144 | \n",
" FL4577775.jpg | \n",
"
\n",
" \n",
" 54062 | \n",
" 112 | \n",
" FL4577793.jpg | \n",
"
\n",
" \n",
" 54172 | \n",
" 126 | \n",
" FL4578163.jpg | \n",
"
\n",
" \n",
" 54173 | \n",
" 111 | \n",
" FL4578164.jpg | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" faces image\n",
"20402 174 FL4470426.jpg\n",
"28301 117 FL4494940.jpg\n",
"34768 145 FL4517063.jpg\n",
"36351 111 FL4522209.jpg\n",
"36353 105 FL4522212.jpg\n",
"37843 109 FL4526927.jpg\n",
"49708 127 FL4564048.jpg\n",
"50428 104 FL4566376.jpg\n",
"53969 132 FL4577551.jpg\n",
"54041 110 FL4577772.jpg\n",
"54044 144 FL4577775.jpg\n",
"54062 112 FL4577793.jpg\n",
"54172 126 FL4578163.jpg\n",
"54173 111 FL4578164.jpg"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# More than 100 faces!?\n",
"\n",
"df.loc[df['faces'] > 100]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
""
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"alt.data_transformers.enable('json')\n",
"alt.Chart(df).mark_bar().encode(\n",
" x=alt.X('faces:Q', bin=alt.BinParams(maxbins=100)),\n",
" y='count():Q'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"230677"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['faces'].sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}