{
 "metadata": {
  "name": "",
  "signature": "sha256:2e87da550d07e2437ba3a6e38c27a1b09738a5069fc9e1c0fa41e699c5335b66"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Video data mangling\n",
      "===================\n",
      "\n",
      "Getting ready to analysis"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import os\n",
      "import concurrent.futures\n",
      "\n",
      "import numpy as np\n",
      "\n",
      "from utils import utils, video"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Some data sanity validations, affine transformation, and area clipping"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "a_data = video.load_table('A')\n",
      "b_data = video.load_table('B')\n",
      "metadata = video.load_metadata()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Filter using the index and the metadata about the starting and ending of each block."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "a_index, b_index = [], []\n",
      "for b in ['block 1', 'block 2', 'block 3']:\n",
      "    a_index.extend(utils.f_range(metadata[b]['start'], metadata[b]['end'], 0.5))\n",
      "for b in ['block 5', 'block 6', 'block 7']:\n",
      "    b_index.extend(utils.f_range(metadata[b]['start'], metadata[b]['end'], 0.5))\n",
      "\n",
      "groups = {'A': a_data.loc[a_index], 'B': b_data.loc[b_index]}"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Analyze the percentage of existing data in the dataset (percentage of not NaN values). Fill NaNs using forward filling."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for name, group_data in sorted(groups.items()):\n",
      "    exists = group_data.notnull().as_matrix().flatten()\n",
      "    exist_percentage = len(exists[exists == True]) / len(exists)\n",
      "    print('Group {}, percentage of not NaN values: {}'.format(name, 100 * exist_percentage))\n",
      "    group_data.fillna(method='ffill', inplace=True)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Group A, percentage of not NaN values: 99.93267186392629\n",
        "Group B, percentage of not NaN values: 99.85931513409962\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Transform the view usign the metadata. And clip to reasonable dance floor boundaries (after calculating irregularities)."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "known_ys = metadata['static points']['absolute']\n",
      "a_known_xs = metadata['static points']['group a']\n",
      "b_known_xs = metadata['static points']['group b']\n",
      "\n",
      "with concurrent.futures.ProcessPoolExecutor() as executor:\n",
      "    future_a = executor.submit(video.transform, groups['A'], a_known_xs, known_ys)\n",
      "    future_b = executor.submit(video.transform, groups['B'], b_known_xs, known_ys)\n",
      "groups['A'] = future_a.result()\n",
      "groups['B'] = future_b.result()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Clip to reasonable dance floor boundaries (after calculating irregularities)."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "lower = -4\n",
      "upper = 15\n",
      "\n",
      "rows_list = []\n",
      "for name in sorted(groups):\n",
      "    values = groups[name].values\n",
      "    total = len(values.flatten())\n",
      "    below_bounds = len(values[lower > values])\n",
      "    above_bounds = len(values[upper < values])\n",
      "    in_bounds = total - below_bounds - above_bounds\n",
      "    rows_list.append([name, 100 * below_bounds / total, 100 * above_bounds / total, 100 * in_bounds / total])\n",
      "    groups[name] = groups[name].clip(lower=lower, upper=upper)\n",
      "    print('Group: {}'.format(name))\n",
      "    print('Percentage of values below boundarie: {}'.format(100 * below_bounds / total))\n",
      "    print('Percentage of values above boundarie: {}'.format(100 * above_bounds / total))\n",
      "    print('Total in boundaries: {}'.format(100 * in_bounds / total))\n",
      "    print()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Group: A\n",
        "Percentage of values below boundarie: 4.557051736357193\n",
        "Percentage of values above boundarie: 2.273210489014883\n",
        "Total in boundaries: 93.16973777462792\n",
        "\n",
        "Group: B\n",
        "Percentage of values below boundarie: 0.05836925287356322\n",
        "Percentage of values above boundarie: 0.11673850574712644\n",
        "Total in boundaries: 99.82489224137932\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Generate bench position\n",
      "There was a bench in the experiment area where participants gathered together some of the time. Calculate its position based on `metadata['bench']`."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "benches = []  # the positioning of the bench according to the two videos (different groups)\n",
      "for name, known_xs in [('a', a_known_xs), ('b', b_known_xs)]:\n",
      "    A, b = video.solve_affine_transformation(known_xs, known_ys)\n",
      "    benches.append(video.transform_vec(metadata['bench']['group {}'.format(name)], A, b))\n",
      "\n",
      "# distance between positions (error wise)\n",
      "distance = utils.euclidean_distance(*benches)\n",
      "print('Distance of the error between the position values:', distance, '(meters)')\n",
      "\n",
      "# position of the bench will be the mean of the two\n",
      "bench_pos = np.array(benches).mean(axis=0)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Distance of the error between the position values: 0.379929100616 (meters)\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Save data to csv files"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for name, group in  groups.items():\n",
      "    group.index.name = 'frame'\n",
      "    group.to_csv(os.path.join('cooked', 'video_group_{}.csv'.format(name.lower())))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "np.savetxt(os.path.join('cooked', 'video_boundaries.csv'), (lower, upper))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "np.savetxt(os.path.join('cooked', 'video_bench_pos.csv'), bench_pos)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    }
   ],
   "metadata": {}
  }
 ]
}