{ "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" }, "name": "", "signature": "sha256:d616bfec0be2817fb9fa7b2f85dfd85dc0bc5c1f282e1f9252eb69396236a1a4" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import pandas as pd\n", "import numpy as np\n", "import datetime" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Import the data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "start_day=datetime.datetime(2013,11,1)\n", "end_day=datetime.datetime(2013,11,30)\n", "store = pd.HDFStore('../stores/sms-call-internet-mi-table-blosc.h5')\n", "intensity_data = store.select('telco_data', \"index >= Timestamp('%s') & index < Timestamp('%s')\" % (start_day, end_day)).fillna(0)\n", "store.close()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Aggregate and symmetrize the in and out activity of cells" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df_aggregated = intensity_data.groupby(['Square_id', 'Country_code'])\\\n", " [['SMS_in', 'SMS_out', 'Call_in', 'Call_out']].sum()\n", "df_aggregated['SMS'] = df_aggregated.SMS_in.values + df_aggregated.SMS_out.values\n", "df_aggregated['Call'] = df_aggregated.Call_in.values + df_aggregated.Call_out.values" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Entropies" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def entropy(L):\n", " e = 0;\n", " t=sum(L);\n", " if t!=0 and len(L)>0:\n", " for l in L:\n", " \n", " if l>0:\n", " e += - (l/float(t)) * np.log(l/float(t));\n", " \n", " return e;\n", " else:\n", " return 0;\n", "\n", "df2 = df_aggregated.SMS.reset_index()\n", "df2bis = df2[(df2.Country_code!=39) & (df2.Country_code!=0)];\n", "entropy_dict={}\n", "cells = sorted(list(set(df2['Square_id'])))\n", "for cell in cells:\n", " entropy_dict[cell]=entropy(df2bis[df2bis.Square_id==cell]['SMS'].values);" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "df_call = df_aggregated.Call.reset_index()\n", "df_call_filter = df_call[(df_call.Country_code!=39) & (df_call.Country_code!=0)];" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "call_renorm_entropy_ds = df_call_filter.groupby(df_call_filter.Square_id)['Call'].apply(entropy)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Low entropy states $S < \\mu - \\sigma$ " ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pickle as pk\n", "threshold_factor=0.1;\n", "call_thr=3\n", "country_call_cells={}\n", "M = call_renorm_entropy_ds.max()\n", "for i, country in enumerate(list(set(df_call_filter.Country_code))):\n", " toy_series=pd.Series(data=df_call_filter[df_call_filter.Country_code==country]['Call'].values, index=df_call_filter[df_call_filter.Country_code==country]['Square_id'].values).reindex(np.array(range(10000)))\n", " toy_series= toy_series.reindex(range(0,10000))\n", " max_calls = toy_series.max(skipna=True);\n", " act_mu = toy_series.mean(skipna=True)\n", " std_mu = toy_series.std(skipna=True)\n", " second_toy=toy_series.fillna(0).apply(lambda x: 0 if xcall_thr else M-x).values;\n", " deh=second_toy.apply(lambda x: 0 if x<=threshold_factor*second_toy.max(skipna=True) else 1).reshape(100,100);\n", " pk.dump(deh,open('../stores/phom/country_high_act_low_entropy_matrices/'+str(country)+'.pck','w'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 } ], "metadata": {} } ] }