{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# [Goulib](../notebook.ipynb).table\n", "\"mini pandas.DataFrame\" Table class with Excel + CSV I/O, easy access to columns, HTML output, and much more." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from Goulib.notebook import *\n", "from Goulib.table import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
123
45
\n" ], "text/plain": [ "Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data\n", "small" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
complexcontent
\n", "\n", "
123
45
\n", "
$\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$
\n" ], "text/plain": [ "Table(len=2,titles=['complex', 'content'],data=[[Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]]), '$\\\\\\\\LaTeX : \\\\sqrt{\\\\left(a+b\\\\right)\\\\left(a-b\\\\right)}$'], [Image(mode=RGB shape=(128, 128, 3) type=float64), Image(mode=RGB shape=(128, 128, 3) type=float64)]])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)\n", "from Goulib.image import Image\n", "lena=Image('../tests/data/lena.png').resize((128,128))\n", "Table([[small,r'$\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$'],[lena,lena]],titles=['complex','content'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)\n", "t=Table('../tests/data/test.xls') " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total']\n" ] } ], "source": [ "print(t.titles) #Tables have optional column headers" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
OrderDateRégionRepItemUnitésCostTotal
41061EastJonesPencil951.99 
1/23/2012CentralKivellBinder5019.99 
41154CentralJardinePencil364.99 
2/26/2012CentralGillPen2719.99 
3/15/2012WestSorvinoPencil562.99 
\n" ], "text/plain": [ "Table(len=5,titles=['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total'],data=[[41061, 'East', 'Jones', 'Pencil', 95, 1.99, None], ['1/23/2012', 'Central', 'Kivell', 'Binder', 50, 19.99, None], [41154, 'Central', 'Jardine', 'Pencil', 36, 4.99, None], ['2/26/2012', 'Central', 'Gill', 'Pen', 27, 19.99, None], ['3/15/2012', 'West', 'Sorvino', 'Pencil', 56, 2.99, None]])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.setcol('Total',None)\n", "Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation\n", "#notice the Order Dates are messy because of Excel representaion of dates" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4.99, 4.99)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t[2,5],t[2,'Cost'] # cells can be accessed by row,col index or title" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1.99, 19.99, 4.99, 19.99, 2.99]\n" ] } ], "source": [ "print(t[:5,'Cost']) # indexing supports slices too" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# handle the mess in Excel Cell types ...\n", "t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...\n", "t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# math between columns is still a bit tedious...\n", "from Goulib.math2 import vecmul\n", "t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[datetime.date(2013, 12, 21),\n", " 3,\n", " 11,\n", " 5,\n", " 2121,\n", " 20.308604651162796,\n", " 19627.88000000001]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#it's easy to make a \"total\" line from columns\n", "#with a list of reduce-like functions applied to each column\n", "from Goulib.stats import avg\n", "from Goulib.itertools2 import count_unique\n", "t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])\n", "t.footer #result is stored in a separated footer field" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
OrderDateRégionRepItemUnitésCostTotal
.....................
2012-01-04EastJonesBinder604.99299.40
2012-04-18CentralAndrewsPencil751.99149.25
2012-05-05CentralJardinePencil904.99449.10
2012-05-22WestThompsonPencil321.9963.68
2012-08-06EastJonesBinder608.99539.40
.....................
2013-12-213115212120.3119627.88
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "h(t.html(start=5,stop=10)) # a way to shorten long tables" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
OrderDateRégionRepItemUnitésCostTotal
2013-04-12CentralJardineBinder9419.991879.06
2012-07-29EastParentBinder8119.991619.19
2013-01-02CentralSmithBinder8715.001305.00
2012-12-29EastParentPen Set7415.991183.26
2013-10-14WestThompsonBinder5719.991139.43
.....................
2013-12-213115212120.3119627.88
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "t.sort('Total',reverse=True) # Tables can be sorted by column easily\n", "h(t.html(stop=5)) # show only the 5 lines with highest total" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
OrderDateRepItemUnitésCostTotal
2012-07-29ParentBinder8119.991619.19
2012-12-29ParentPen Set7415.991183.26
2012-10-22JonesPen648.99575.36
2012-08-06JonesBinder608.99539.40
2013-04-27HowardPen964.99479.04
2013-04-07JonesPen Set624.99309.38
2012-08-11ParentPen1519.99299.85
2012-01-04JonesBinder604.99299.40
2012-09-18JonesPen Set1615.99255.84
2012-06-01JonesPencil951.99189.05
2012-08-15JonesPencil354.99174.65
2012-12-07HowardBinder291.9957.71
2013-02-18JonesBinder44.9919.96
\n" ], "text/plain": [ "Table(len=13,titles=['OrderDate', 'Rep', 'Item', 'Unités', 'Cost', 'Total'],data=[[datetime.date(2012, 7, 29), 'Parent', 'Binder', 81, 19.99, 1619.1899999999998], [datetime.date(2012, 12, 29), 'Parent', 'Pen Set', 74, 15.99, 1183.26], [datetime.date(2012, 10, 22), 'Jones', 'Pen', 64, 8.99, 575.36], [datetime.date(2012, 8, 6), 'Jones', 'Binder', 60, 8.99, 539.4], [datetime.date(2013, 4, 27), 'Howard', 'Pen', 96, 4.99, 479.04]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support\n", "region['East'] # isn't it nice ?" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "OrderedDict([('OrderDate', datetime.date(2013, 1, 2)),\n", " ('Région', 'Central'),\n", " ('Rep', 'Smith'),\n", " ('Item', 'Binder'),\n", " ('Unités', 87),\n", " ('Cost', 15.0),\n", " ('Total', 1305.0)])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#a row can be extracted as a dict where column titles are keys\n", "t.rowasdict(1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'[{\"OrderDate\": \"2013-04-12\", \"R\\\\u00e9gion\": \"Central\", \"Rep\": \"Jardine\", \"Item\": \"Binder\", \"Unit\\\\u00e9s\": 94, \"Cost\": 19.99, \"Total\": 1879.06}, {\"OrderDate\": \"2013-01-02\", \"R\\\\u00e9gion\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Binder\", \"Unit\\\\u00e9s\": 87,...'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.json()[:250]+'...' #rowasdict is handy to build json representation" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
OrderDateRégionRepItemUnitésCostTotal
2013-04-12CentralJardineBinder9419.991879.06
2013-07-08CentralKivellPen Set4223.951005.90
2012-01-23CentralKivellBinder5019.99999.50
2013-05-31CentralGillBinder808.99719.20
2012-02-26CentralGillPen2719.99539.73
2012-06-25CentralMorganPencil904.99449.10
2012-11-25CentralKivellPen Set964.99479.04
2013-06-17CentralKivellDesk5125.00625.00
2012-01-09CentralSmithDesk2125.00250.00
2012-05-10CentralMorganBinder288.99251.72
2013-07-21CentralMorganPen Set5512.49686.95
2013-01-02CentralSmithBinder8715.001305.00
2012-05-05CentralJardinePencil904.99449.10
2013-03-24CentralJardinePen Set504.99249.50
2013-01-15CentralGillBinder468.99413.54
2012-04-18CentralAndrewsPencil751.99149.25
2013-12-21CentralAndrewsBinder284.99139.72
2013-05-14CentralGillPencil531.2968.37
2012-12-12CentralSmithPencil671.2986.43
2013-10-31CentralAndrewsPencil141.2918.06
2012-09-02CentralJardinePencil364.99179.64
2013-11-17CentralJardineBinder114.9954.89
2012-08-11EastParentPen1519.99299.85
2013-10-09CentralGillPencil71.299.03
2013-04-07EastJonesPen Set624.99309.38
2013-10-04CentralAndrewsPencil661.99131.34
2012-09-18EastJonesPen Set1615.99255.84
2012-07-29EastParentBinder8119.991619.19
2013-04-27EastHowardPen964.99479.04
2012-08-06EastJonesBinder608.99539.40
2012-01-04EastJonesBinder604.99299.40
2012-10-22EastJonesPen648.99575.36
2012-06-01EastJonesPencil951.99189.05
2012-08-15EastJonesPencil354.99174.65
2012-12-07EastHowardBinder291.9957.71
2013-02-18EastJonesBinder44.9919.96
2013-10-14WestThompsonBinder5719.991139.43
2013-08-24WestSorvinoDesk3275.00825.00
2012-03-15WestSorvinoPencil562.99167.44
2013-09-27WestSorvinoPen761.99151.24
2013-07-03WestSorvinoBinder719.99139.93
2012-05-22WestThompsonPencil321.9963.68
2012-12-29EastParentPen Set7415.991183.26
2013-12-213115212120.3119627.88
\n" ], "text/plain": [ "Table(len=43,titles=['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total'],data=[[datetime.date(2013, 4, 12), 'Central', 'Jardine', 'Binder', 94, 19.99, 1879.06], [datetime.date(2013, 7, 8), 'Central', 'Kivell', 'Pen Set', 42, 23.95, 1005.9], [datetime.date(2012, 1, 23), 'Central', 'Kivell', 'Binder', 50, 19.99, 999.4999999999999], [datetime.date(2013, 5, 31), 'Central', 'Gill', 'Binder', 80, 8.99, 719.2], [datetime.date(2012, 2, 26), 'Central', 'Gill', 'Pen', 27, 19.99, 539.7299999999999]])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from Goulib.math2 import *\n", "from Goulib.itertools2 import *\n", "res=Table(t) #copy\n", "s=len(res)\n", "for i in range(s-1):\n", " line=res[i]\n", " d=[hamming(line,res[j]) for j in range(i+1,s)]\n", " j=index_min(d)[0]+i\n", " res[i+1],res[j]=res[j],res[i+1] #swap\n", "res" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hamming(t[1],t[2])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
complexcontent
\n", "\n", "
123
45
\n", "
$\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$
\n" ], "text/plain": [ "Table(len=2,titles=['complex', 'content'],data=[[Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]]), '$\\\\\\\\LaTeX : \\\\sqrt{\\\\left(a+b\\\\right)\\\\left(a-b\\\\right)}$'], [Image(mode=RGB shape=(128, 128, 3) type=float64), Image(mode=RGB shape=(128, 128, 3) type=float64)]])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Table([[small,r'$\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$'],[lena,lena]],titles=['complex','content'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }