{ "metadata": { "name": "", "signature": "sha256:2e87da550d07e2437ba3a6e38c27a1b09738a5069fc9e1c0fa41e699c5335b66" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Video data mangling\n", "===================\n", "\n", "Getting ready to analysis" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import os\n", "import concurrent.futures\n", "\n", "import numpy as np\n", "\n", "from utils import utils, video" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Some data sanity validations, affine transformation, and area clipping" ] }, { "cell_type": "code", "collapsed": false, "input": [ "a_data = video.load_table('A')\n", "b_data = video.load_table('B')\n", "metadata = video.load_metadata()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filter using the index and the metadata about the starting and ending of each block." ] }, { "cell_type": "code", "collapsed": false, "input": [ "a_index, b_index = [], []\n", "for b in ['block 1', 'block 2', 'block 3']:\n", " a_index.extend(utils.f_range(metadata[b]['start'], metadata[b]['end'], 0.5))\n", "for b in ['block 5', 'block 6', 'block 7']:\n", " b_index.extend(utils.f_range(metadata[b]['start'], metadata[b]['end'], 0.5))\n", "\n", "groups = {'A': a_data.loc[a_index], 'B': b_data.loc[b_index]}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Analyze the percentage of existing data in the dataset (percentage of not NaN values). Fill NaNs using forward filling." ] }, { "cell_type": "code", "collapsed": false, "input": [ "for name, group_data in sorted(groups.items()):\n", " exists = group_data.notnull().as_matrix().flatten()\n", " exist_percentage = len(exists[exists == True]) / len(exists)\n", " print('Group {}, percentage of not NaN values: {}'.format(name, 100 * exist_percentage))\n", " group_data.fillna(method='ffill', inplace=True)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Group A, percentage of not NaN values: 99.93267186392629\n", "Group B, percentage of not NaN values: 99.85931513409962\n" ] } ], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Transform the view usign the metadata. And clip to reasonable dance floor boundaries (after calculating irregularities)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "known_ys = metadata['static points']['absolute']\n", "a_known_xs = metadata['static points']['group a']\n", "b_known_xs = metadata['static points']['group b']\n", "\n", "with concurrent.futures.ProcessPoolExecutor() as executor:\n", " future_a = executor.submit(video.transform, groups['A'], a_known_xs, known_ys)\n", " future_b = executor.submit(video.transform, groups['B'], b_known_xs, known_ys)\n", "groups['A'] = future_a.result()\n", "groups['B'] = future_b.result()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Clip to reasonable dance floor boundaries (after calculating irregularities)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "lower = -4\n", "upper = 15\n", "\n", "rows_list = []\n", "for name in sorted(groups):\n", " values = groups[name].values\n", " total = len(values.flatten())\n", " below_bounds = len(values[lower > values])\n", " above_bounds = len(values[upper < values])\n", " in_bounds = total - below_bounds - above_bounds\n", " rows_list.append([name, 100 * below_bounds / total, 100 * above_bounds / total, 100 * in_bounds / total])\n", " groups[name] = groups[name].clip(lower=lower, upper=upper)\n", " print('Group: {}'.format(name))\n", " print('Percentage of values below boundarie: {}'.format(100 * below_bounds / total))\n", " print('Percentage of values above boundarie: {}'.format(100 * above_bounds / total))\n", " print('Total in boundaries: {}'.format(100 * in_bounds / total))\n", " print()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Group: A\n", "Percentage of values below boundarie: 4.557051736357193\n", "Percentage of values above boundarie: 2.273210489014883\n", "Total in boundaries: 93.16973777462792\n", "\n", "Group: B\n", "Percentage of values below boundarie: 0.05836925287356322\n", "Percentage of values above boundarie: 0.11673850574712644\n", "Total in boundaries: 99.82489224137932\n", "\n" ] } ], "prompt_number": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate bench position\n", "There was a bench in the experiment area where participants gathered together some of the time. Calculate its position based on `metadata['bench']`." ] }, { "cell_type": "code", "collapsed": false, "input": [ "benches = [] # the positioning of the bench according to the two videos (different groups)\n", "for name, known_xs in [('a', a_known_xs), ('b', b_known_xs)]:\n", " A, b = video.solve_affine_transformation(known_xs, known_ys)\n", " benches.append(video.transform_vec(metadata['bench']['group {}'.format(name)], A, b))\n", "\n", "# distance between positions (error wise)\n", "distance = utils.euclidean_distance(*benches)\n", "print('Distance of the error between the position values:', distance, '(meters)')\n", "\n", "# position of the bench will be the mean of the two\n", "bench_pos = np.array(benches).mean(axis=0)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Distance of the error between the position values: 0.379929100616 (meters)\n" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save data to csv files" ] }, { "cell_type": "code", "collapsed": false, "input": [ "for name, group in groups.items():\n", " group.index.name = 'frame'\n", " group.to_csv(os.path.join('cooked', 'video_group_{}.csv'.format(name.lower())))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "np.savetxt(os.path.join('cooked', 'video_boundaries.csv'), (lower, upper))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "np.savetxt(os.path.join('cooked', 'video_bench_pos.csv'), bench_pos)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 } ], "metadata": {} } ] }