{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from scipy import sparse as sp\n", "import numpy as np\n", "\n", "def spcategorical(n_cat_ids):\n", " '''\n", " Returns a dummy matrix given an array of categorical variables.\n", " Parameters\n", " ----------\n", " n_cat_ids : array\n", " A 1d vector of the categorical labels for n observations.\n", "\n", " Returns\n", " --------\n", " dummy : array\n", " A sparse matrix of dummy (indicator/binary) variables for the\n", " categorical data. \n", "\n", " '''\n", " if np.squeeze(n_cat_ids).ndim == 1:\n", " cat_set = np.unique(n_cat_ids)\n", " n = len(n_cat_ids)\n", " index = [np.where(cat_set == id)[0].tolist()[0] for id in n_cat_ids] #This list comprehension is likely \n", " print index #the most intense part of the algorithm\n", " indptr = np.arange(n+1, dtype=int) \n", " return sp.csr_matrix((np.ones(n), index, indptr))\n", " else:\n", " raise IndexError(\"The index %s is not understood\" % col)\n", "\n", "#If the variable, n_cat_ids, is already composed of integers and the integers are the n x 1 vector of\n", "#origins or destinations in OD pairs for which w ewant to build fixed effects then there is no need to \n", "#create the index variable, which probably takes the most time within this function. Instead n_cat_ids can\n", "#passed directly to the csr matrix constructor and some speed-ups can be achieved. In the case where the\n", "#origin/destination ids are not integers but are strings a speed-up may be possible by alterign the algorithm\n", "#so that the index is build in chunks (say each origin/destination) rather than for each row of of the n x 1\n", "#n_cat_ids array as is done in creating the index variable." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }