{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd, import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#model data\n",
    "df=pd.read_csv('ex/data/days-simulated-v2.tsv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0,270,5,32,10,73,16,25,5,165,2,35,4,300,1,53,1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4,150,16,7,4,623,16,5,8,35,16,20,5,30,2,10,8,1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0,270,1,75,16,30,9,15,16,5,8,40,16,5,10,10,16,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0,240,1,60,10,50,16,5,5,55,2,32,16,8,10,10,16,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0,170,2,20,16,5,4,285,2,15,3,205,16,5,6,6,16,1...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 day\n",
       "0  0,270,5,32,10,73,16,25,5,165,2,35,4,300,1,53,1...\n",
       "1  4,150,16,7,4,623,16,5,8,35,16,20,5,30,2,10,8,1...\n",
       "2  0,270,1,75,16,30,9,15,16,5,8,40,16,5,10,10,16,...\n",
       "3  0,240,1,60,10,50,16,5,5,55,2,32,16,8,10,10,16,...\n",
       "4  0,170,2,20,16,5,4,285,2,15,3,205,16,5,6,6,16,1..."
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#target structure: activity, duration, activity, duration, ...\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df1=pd.read_csv('ex/1.csv')\n",
    "df2=pd.read_csv('ex/2.csv')\n",
    "df3=pd.read_csv('ex/3.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df=pd.concat([df1,df2[1:],df3[1:]]) #no need for headers twice, df headers completely identical"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create weekday dataframe\n",
    "hkoz=df[df.columns[9:489]].reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create weekend dataframe\n",
    "hetv=df[df.columns[489:969]].reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create descriptive stats dataframe\n",
    "desc=df[df.columns[969:]].reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create survey metadata dataframe\n",
    "time=df[df.columns[2:4]].reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#top 16 activity groups\n",
    "activities=[['Alv\\xc3\\xa1s'],\n",
    "['Zuhany / Mosd\\xc3\\xb3'],\n",
    "['\\xc3\\x89tkez\\xc3\\xa9s', '\\xc3\\x89tterem/Vend\\xc3\\xa9gl\\xc5\\x91'],\n",
    "[u'Munka (irodai)', 'Munka (k\\xc3\\xa9tkezi)'],\n",
    "[u'Internet', u'Telefon/Chat/Facebook'],\n",
    "['V\\xc3\\xa1s\\xc3\\xa1rl\\xc3\\xa1s'],\n",
    "['Vall\\xc3\\xa1sgyakorl\\xc3\\xa1s'],\n",
    "[u'TV/Film', u'Mozi'],\n",
    "['Olvas\\xc3\\xa1s', '\\xc3\\x9ajs\\xc3\\xa1g/Keresztrejtv\\xc3\\xa9ny'],\n",
    "['H\\xc3\\xa1zimunka/Gyerekfel\\xc3\\xbcgyelet'],\n",
    "['Hivatalos elint\\xc3\\xa9znival\\xc3\\xb3k'],\n",
    "[u'Sport', 'Edz\\xc5\\x91terem/Sz\\xc3\\xa9ps\\xc3\\xa9gszalon'],\n",
    "['Utaz\\xc3\\xa1s/Vezet\\xc3\\xa9s'],\n",
    "['Tanul\\xc3\\xa1s',  'Mag\\xc3\\xa1n\\xc3\\xb3ra'],\n",
    "['Sz\\xc3\\xb3rakoz\\xc3\\xb3hely/K\\xc3\\xa1v\\xc3\\xa9z\\xc3\\xb3/Pub'],\n",
    "['S\\xc3\\xa9ta/Kutyas\\xc3\\xa9t\\xc3\\xa1ltat\\xc3\\xa1s', 'Term\\xc3\\xa9szet/Kir\\xc3\\xa1ndul\\xc3\\xa1s'],\n",
    "['Egy\\xc3\\xa9b Hobby', 'PC j\\xc3\\xa1t\\xc3\\xa9k','\\xc3\\x96nk\\xc3\\xa9ntess\\xc3\\xa9g', 'Kert\\xc3\\xa9szked\\xc3\\xa9s/Bark\\xc3\\xa1csol\\xc3\\xa1s', 'Rokonl\\xc3\\xa1togat\\xc3\\xa1s',  'M\\xc3\\xa1s']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#bin activities into activity groups\n",
    "actidict={}\n",
    "for i in range(len(activities)):\n",
    "    for j in range(len(activities[i])):\n",
    "        actidict[activities[i][j]]=i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Olvas\\xc3\\xa1s': 8,\n",
       " 'Rokonl\\xc3\\xa1togat\\xc3\\xa1s': 16,\n",
       " u'Mozi': 7,\n",
       " 'M\\xc3\\xa1s': 16,\n",
       " u'Internet': 4,\n",
       " '\\xc3\\x89tterem/Vend\\xc3\\xa9gl\\xc5\\x91': 2,\n",
       " 'H\\xc3\\xa1zimunka/Gyerekfel\\xc3\\xbcgyelet': 9,\n",
       " 'S\\xc3\\xa9ta/Kutyas\\xc3\\xa9t\\xc3\\xa1ltat\\xc3\\xa1s': 15,\n",
       " '\\xc3\\x89tkez\\xc3\\xa9s': 2,\n",
       " 'Vall\\xc3\\xa1sgyakorl\\xc3\\xa1s': 6,\n",
       " '\\xc3\\x96nk\\xc3\\xa9ntess\\xc3\\xa9g': 16,\n",
       " '\\xc3\\x9ajs\\xc3\\xa1g/Keresztrejtv\\xc3\\xa9ny': 8,\n",
       " 'Edz\\xc5\\x91terem/Sz\\xc3\\xa9ps\\xc3\\xa9gszalon': 11,\n",
       " 'Egy\\xc3\\xa9b Hobby': 16,\n",
       " u'TV/Film': 7,\n",
       " 'Alv\\xc3\\xa1s': 0,\n",
       " 'Utaz\\xc3\\xa1s/Vezet\\xc3\\xa9s': 12,\n",
       " 'Sz\\xc3\\xb3rakoz\\xc3\\xb3hely/K\\xc3\\xa1v\\xc3\\xa9z\\xc3\\xb3/Pub': 14,\n",
       " 'Mag\\xc3\\xa1n\\xc3\\xb3ra': 13,\n",
       " 'PC j\\xc3\\xa1t\\xc3\\xa9k': 16,\n",
       " u'Sport': 11,\n",
       " 'Hivatalos elint\\xc3\\xa9znival\\xc3\\xb3k': 10,\n",
       " 'Kert\\xc3\\xa9szked\\xc3\\xa9s/Bark\\xc3\\xa1csol\\xc3\\xa1s': 16,\n",
       " u'Telefon/Chat/Facebook': 4,\n",
       " 'V\\xc3\\xa1s\\xc3\\xa1rl\\xc3\\xa1s': 5,\n",
       " u'Munka (irodai)': 3,\n",
       " 'Munka (k\\xc3\\xa9tkezi)': 3,\n",
       " 'Term\\xc3\\xa9szet/Kir\\xc3\\xa1ndul\\xc3\\xa1s': 15,\n",
       " 'Tanul\\xc3\\xa1s': 13,\n",
       " 'Zuhany / Mosd\\xc3\\xb3': 1}"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#binned activities into activity groups\n",
    "actidict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# run only once\n",
    "hkoz.columns=hkoz.loc[0].values\n",
    "hkoz=hkoz[1:].drop(0,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#extract and linearize data from pandas dataframe\n",
    "hkozdata={}\n",
    "for i in hkoz.index:\n",
    "    index=hkoz.loc[i].index\n",
    "    values=hkoz.loc[i].values\n",
    "    helper=[]\n",
    "    for j in range(len(values)):\n",
    "        if str(values[j]).lower()!='nan':\n",
    "            helper.append(index[j])\n",
    "    hkozdata[i]=helper                "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create timematrix - timeslice:activity list\n",
    "j=1\n",
    "timematrix={}\n",
    "for i in hkozdata[j]:\n",
    "    activity=i[:i.find('-')-1]\n",
    "    timeslice=i[i.find('-')+2:]\n",
    "    if timeslice not in timematrix:timematrix[timeslice]=[]\n",
    "    timematrix[timeslice].append(actidict[activity])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create correct timeslice order to start day at 04:00\n",
    "parseorder=np.roll(np.sort(timematrix.keys()),-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create output list, with shared timeslots\n",
    "output=[]\n",
    "for k in range(len(parseorder)):\n",
    "    helper=timematrix[parseorder[k]]\n",
    "    np.random.shuffle(helper)\n",
    "    output.append(helper[:3]) #max 3 activities within 90 minutes, but create 3 randomized persons"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0], [0, 1], [13], [13], [13, 2], [13], [13], [13], [4, 16, 2], [13], [13], [7, 4, 16], [0], [0], [0], [0]]\n"
     ]
    }
   ],
   "source": [
    "print output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 90, 0, 90, 1, 45, 0, 45, 12, 90, 3, 90, 3, 90, 2, 30, 4, 30, 3, 30, 3, 90, 3, 90, 3, 90, 3, 90, 4, 45, 5, 45, 4, 30, 12, 30, 2, 30, 1, 45, 0, 45, 0, 90, 0, 90]\n"
     ]
    }
   ],
   "source": [
    "#create output CSV list: activity, duration, activity, duration, ...\n",
    "output2=[]\n",
    "fixed=90 # survey 90 min timeslices are fixed\n",
    "for k in range(len(output)):\n",
    "    for z in range(len(output[k])):\n",
    "        output2.append(output[k][z])\n",
    "        output2.append(fixed/(len(output[k])))\n",
    "print output2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3, 90, 4, 45, 5, 45, 4, 30, 12, 30, 2, 30, 1, 45, 0, 45, 0, 90, 0, 90, 3, 60, 4, 45, 5, 45, 4, 30, 12, 30, 2, 30, 1, 45, 0, 45, 0, 90, 0, 90, 4, 75, 0, 90, 4, 135]\n"
     ]
    }
   ],
   "source": [
    "helper=list(output2)\n",
    "for k in range(1,len(output2)/2):\n",
    "    if output2[k*2]==output2[(k-1)*2]:\n",
    "        helper=helper[(k-1)*2:]+[helper[k*2],helper[k*2+1]+helper[k*2-1]]+output2[(k+1)*2:]\n",
    "print helper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 90, 0, 90, 1, 45, 0, 45, 12, 90, 3, 90, 3, 90, 2, 30, 4, 30, 3, 30, 3, 90, 3, 90, 3, 90, 3, 90, 4, 45, 5, 45, 4, 30, 12, 30, 2, 30, 1, 45, 0, 45, 0, 90, 0, 90]\n"
     ]
    }
   ],
   "source": [
    "print output2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: [0, 1, 3, 20, 21, 22],\n",
       " 1: [2, 19],\n",
       " 2: [7, 18],\n",
       " 3: [5, 6, 9, 10, 11, 12, 13],\n",
       " 4: [8, 14, 16],\n",
       " 5: [15],\n",
       " 12: [4, 17]}"
      ]
     },
     "execution_count": 286,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "activityslice"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Parse all data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#create timematrix - timeslice:activity list\n",
    "output4=[]\n",
    "for j in hkozdata:\n",
    "    timematrix={}\n",
    "    for i in hkozdata[j]:\n",
    "        activity=i[:i.find('-')-1]\n",
    "        timeslice=i[i.find('-')+2:]\n",
    "        if timeslice not in timematrix:timematrix[timeslice]=[]\n",
    "        timematrix[timeslice].append(actidict[activity])\n",
    "    #create correct timeslice order to start day at 04:00\n",
    "    parseorder=np.roll(np.sort(timematrix.keys()),-2)\n",
    "    #create output list, with shared timeslots\n",
    "    for x in range(3): #create 3 randomized person-instances\n",
    "        output=[]\n",
    "        for k in range(len(parseorder)):\n",
    "            helper=timematrix[parseorder[k]]\n",
    "            np.random.shuffle(helper)\n",
    "            output.append(helper[:3]) #max 3 activities within 90 minutes, but create 3 randomized persons\n",
    "        #create output CSV list: activity, duration, activity, duration, ...\n",
    "        output2=[]\n",
    "        fixed=90 # survey 90 min timeslices are fixed\n",
    "        for k in range(len(output)):\n",
    "            for z in range(len(output[k])):\n",
    "                output2.append(output[k][z])\n",
    "                output2.append(fixed/(len(output[k])))\n",
    "        output4.append(str(output2)[1:-1].replace(' ',''))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 264,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "savedata=pd.DataFrame(output4)\n",
    "savedata.columns=['day']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 265,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "savedata.to_csv('hkoz.csv',index=False)"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}