{ "metadata": { "name": "", "signature": "sha256:afa3fa52acc2e4f4c7f68fdccddfd7dbe5a9beed2b1e103a25095d2865b3519b" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "%pylab inline\n", "\n", "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "/home/walrus/.virtualenvs/py/local/lib/python2.7/site-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.\n", " .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "data = pd.read_excel('../Orcamento/2015/Proposta/PLOA467BaseDadosQuadroDetalhadoDaAcao.xls')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_data_info(data):\n", " data_info = {}\n", " \n", " for c in data.columns:\n", " if data[c].dtype in [np.dtype('int64'), np.dtype('float64')]:\n", " data_info[c] = {}\n", " data_info[c]['distinct_values'] = len(data[c].value_counts())\n", " data_info[c]['range'] = (data[c].min(), data[c].max())\n", " \n", " return data_info" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 61 }, { "cell_type": "code", "collapsed": false, "input": [ "data_info = get_data_info(data)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 64 }, { "cell_type": "code", "collapsed": false, "input": [ "data_info" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 65, "text": [ "{u'ANO_EX': {'distinct_values': 1, 'range': (2015, 2015)},\n", " u'COD_DA': {'distinct_values': 73, 'range': (1, 100)},\n", " u'COD_DISTRITO': {'distinct_values': 68, 'range': (0, 99)},\n", " u'COD_EMP': {'distinct_values': 14, 'range': (1, 91)},\n", " u'COD_META': {'distinct_values': 321, 'range': (1, 999)},\n", " u'COD_REGIAO': {'distinct_values': 7, 'range': (0, 9)},\n", " u'COD_SUBPREFEITURA': {'distinct_values': 34, 'range': (0, 99)},\n", " u'ORGAO': {'distinct_values': 86, 'range': (1, 99)},\n", " u'PA': {'distinct_values': 313, 'range': (4, 9133)},\n", " u'UNIDADE': {'distinct_values': 24, 'range': (10, 70)},\n", " u'VALOR_DA': {'distinct_values': 1071, 'range': (1000.0, 5488238392.0)}}" ] } ], "prompt_number": 65 } ], "metadata": {} } ] }