{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# [Goulib](../notebook.ipynb).table\n",
"\"mini pandas.DataFrame\" Table class with Excel + CSV I/O, easy access to columns, HTML output, and much more."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from Goulib.notebook import *\n",
"from Goulib.table import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n"
],
"text/plain": [
"Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data\n",
"small"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"complex | content |
\n",
"\n",
"\n",
" | $\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$ |
\n",
" | |
\n",
"
\n"
],
"text/plain": [
"Table(len=2,titles=['complex', 'content'],data=[[Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]]), '$\\\\\\\\LaTeX : \\\\sqrt{\\\\left(a+b\\\\right)\\\\left(a-b\\\\right)}$'], [Image(mode=RGB shape=(128, 128, 3) type=float64), Image(mode=RGB shape=(128, 128, 3) type=float64)]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)\n",
"from Goulib.image import Image\n",
"lena=Image('../tests/data/lena.png').resize((128,128))\n",
"Table([[small,r'$\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$'],[lena,lena]],titles=['complex','content'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)\n",
"t=Table('../tests/data/test.xls') "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total']\n"
]
}
],
"source": [
"print(t.titles) #Tables have optional column headers"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OrderDate | Région | Rep | Item | Unités | Cost | Total |
\n",
"\n",
"41061 | East | Jones | Pencil | 95 | 1.99 | |
\n",
"1/23/2012 | Central | Kivell | Binder | 50 | 19.99 | |
\n",
"41154 | Central | Jardine | Pencil | 36 | 4.99 | |
\n",
"2/26/2012 | Central | Gill | Pen | 27 | 19.99 | |
\n",
"3/15/2012 | West | Sorvino | Pencil | 56 | 2.99 | |
\n",
"
\n"
],
"text/plain": [
"Table(len=5,titles=['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total'],data=[[41061, 'East', 'Jones', 'Pencil', 95, 1.99, None], ['1/23/2012', 'Central', 'Kivell', 'Binder', 50, 19.99, None], [41154, 'Central', 'Jardine', 'Pencil', 36, 4.99, None], ['2/26/2012', 'Central', 'Gill', 'Pen', 27, 19.99, None], ['3/15/2012', 'West', 'Sorvino', 'Pencil', 56, 2.99, None]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t.setcol('Total',None)\n",
"Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation\n",
"#notice the Order Dates are messy because of Excel representaion of dates"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(4.99, 4.99)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t[2,5],t[2,'Cost'] # cells can be accessed by row,col index or title"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1.99, 19.99, 4.99, 19.99, 2.99]\n"
]
}
],
"source": [
"print(t[:5,'Cost']) # indexing supports slices too"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# handle the mess in Excel Cell types ...\n",
"t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...\n",
"t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# math between columns is still a bit tedious...\n",
"from Goulib.math2 import vecmul\n",
"t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[datetime.date(2013, 12, 21),\n",
" 3,\n",
" 11,\n",
" 5,\n",
" 2121,\n",
" 20.308604651162796,\n",
" 19627.88000000001]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#it's easy to make a \"total\" line from columns\n",
"#with a list of reduce-like functions applied to each column\n",
"from Goulib.stats import avg\n",
"from Goulib.itertools2 import count_unique\n",
"t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])\n",
"t.footer #result is stored in a separated footer field"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OrderDate | Région | Rep | Item | Unités | Cost | Total |
\n",
"\n",
"... | ... | ... | ... | ... | ... | ... |
\n",
"2012-01-04 | East | Jones | Binder | 60 | 4.99 | 299.40 |
\n",
"2012-04-18 | Central | Andrews | Pencil | 75 | 1.99 | 149.25 |
\n",
"2012-05-05 | Central | Jardine | Pencil | 90 | 4.99 | 449.10 |
\n",
"2012-05-22 | West | Thompson | Pencil | 32 | 1.99 | 63.68 |
\n",
"2012-08-06 | East | Jones | Binder | 60 | 8.99 | 539.40 |
\n",
"... | ... | ... | ... | ... | ... | ... |
\n",
"\n",
"2013-12-21 | 3 | 11 | 5 | 2121 | 20.31 | 19627.88 |
\n",
"\n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"h(t.html(start=5,stop=10)) # a way to shorten long tables"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OrderDate | Région | Rep | Item | Unités | Cost | Total |
\n",
"\n",
"2013-04-12 | Central | Jardine | Binder | 94 | 19.99 | 1879.06 |
\n",
"2012-07-29 | East | Parent | Binder | 81 | 19.99 | 1619.19 |
\n",
"2013-01-02 | Central | Smith | Binder | 87 | 15.00 | 1305.00 |
\n",
"2012-12-29 | East | Parent | Pen Set | 74 | 15.99 | 1183.26 |
\n",
"2013-10-14 | West | Thompson | Binder | 57 | 19.99 | 1139.43 |
\n",
"... | ... | ... | ... | ... | ... | ... |
\n",
"\n",
"2013-12-21 | 3 | 11 | 5 | 2121 | 20.31 | 19627.88 |
\n",
"\n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"t.sort('Total',reverse=True) # Tables can be sorted by column easily\n",
"h(t.html(stop=5)) # show only the 5 lines with highest total"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OrderDate | Rep | Item | Unités | Cost | Total |
\n",
"\n",
"2012-07-29 | Parent | Binder | 81 | 19.99 | 1619.19 |
\n",
"2012-12-29 | Parent | Pen Set | 74 | 15.99 | 1183.26 |
\n",
"2012-10-22 | Jones | Pen | 64 | 8.99 | 575.36 |
\n",
"2012-08-06 | Jones | Binder | 60 | 8.99 | 539.40 |
\n",
"2013-04-27 | Howard | Pen | 96 | 4.99 | 479.04 |
\n",
"2013-04-07 | Jones | Pen Set | 62 | 4.99 | 309.38 |
\n",
"2012-08-11 | Parent | Pen | 15 | 19.99 | 299.85 |
\n",
"2012-01-04 | Jones | Binder | 60 | 4.99 | 299.40 |
\n",
"2012-09-18 | Jones | Pen Set | 16 | 15.99 | 255.84 |
\n",
"2012-06-01 | Jones | Pencil | 95 | 1.99 | 189.05 |
\n",
"2012-08-15 | Jones | Pencil | 35 | 4.99 | 174.65 |
\n",
"2012-12-07 | Howard | Binder | 29 | 1.99 | 57.71 |
\n",
"2013-02-18 | Jones | Binder | 4 | 4.99 | 19.96 |
\n",
"
\n"
],
"text/plain": [
"Table(len=13,titles=['OrderDate', 'Rep', 'Item', 'Unités', 'Cost', 'Total'],data=[[datetime.date(2012, 7, 29), 'Parent', 'Binder', 81, 19.99, 1619.1899999999998], [datetime.date(2012, 12, 29), 'Parent', 'Pen Set', 74, 15.99, 1183.26], [datetime.date(2012, 10, 22), 'Jones', 'Pen', 64, 8.99, 575.36], [datetime.date(2012, 8, 6), 'Jones', 'Binder', 60, 8.99, 539.4], [datetime.date(2013, 4, 27), 'Howard', 'Pen', 96, 4.99, 479.04]])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support\n",
"region['East'] # isn't it nice ?"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OrderedDict([('OrderDate', datetime.date(2013, 1, 2)),\n",
" ('Région', 'Central'),\n",
" ('Rep', 'Smith'),\n",
" ('Item', 'Binder'),\n",
" ('Unités', 87),\n",
" ('Cost', 15.0),\n",
" ('Total', 1305.0)])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#a row can be extracted as a dict where column titles are keys\n",
"t.rowasdict(1)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'[{\"OrderDate\": \"2013-04-12\", \"R\\\\u00e9gion\": \"Central\", \"Rep\": \"Jardine\", \"Item\": \"Binder\", \"Unit\\\\u00e9s\": 94, \"Cost\": 19.99, \"Total\": 1879.06}, {\"OrderDate\": \"2013-01-02\", \"R\\\\u00e9gion\": \"Central\", \"Rep\": \"Smith\", \"Item\": \"Binder\", \"Unit\\\\u00e9s\": 87,...'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t.json()[:250]+'...' #rowasdict is handy to build json representation"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OrderDate | Région | Rep | Item | Unités | Cost | Total |
\n",
"\n",
"2013-04-12 | Central | Jardine | Binder | 94 | 19.99 | 1879.06 |
\n",
"2013-07-08 | Central | Kivell | Pen Set | 42 | 23.95 | 1005.90 |
\n",
"2012-01-23 | Central | Kivell | Binder | 50 | 19.99 | 999.50 |
\n",
"2013-05-31 | Central | Gill | Binder | 80 | 8.99 | 719.20 |
\n",
"2012-02-26 | Central | Gill | Pen | 27 | 19.99 | 539.73 |
\n",
"2012-06-25 | Central | Morgan | Pencil | 90 | 4.99 | 449.10 |
\n",
"2012-11-25 | Central | Kivell | Pen Set | 96 | 4.99 | 479.04 |
\n",
"2013-06-17 | Central | Kivell | Desk | 5 | 125.00 | 625.00 |
\n",
"2012-01-09 | Central | Smith | Desk | 2 | 125.00 | 250.00 |
\n",
"2012-05-10 | Central | Morgan | Binder | 28 | 8.99 | 251.72 |
\n",
"2013-07-21 | Central | Morgan | Pen Set | 55 | 12.49 | 686.95 |
\n",
"2013-01-02 | Central | Smith | Binder | 87 | 15.00 | 1305.00 |
\n",
"2012-05-05 | Central | Jardine | Pencil | 90 | 4.99 | 449.10 |
\n",
"2013-03-24 | Central | Jardine | Pen Set | 50 | 4.99 | 249.50 |
\n",
"2013-01-15 | Central | Gill | Binder | 46 | 8.99 | 413.54 |
\n",
"2012-04-18 | Central | Andrews | Pencil | 75 | 1.99 | 149.25 |
\n",
"2013-12-21 | Central | Andrews | Binder | 28 | 4.99 | 139.72 |
\n",
"2013-05-14 | Central | Gill | Pencil | 53 | 1.29 | 68.37 |
\n",
"2012-12-12 | Central | Smith | Pencil | 67 | 1.29 | 86.43 |
\n",
"2013-10-31 | Central | Andrews | Pencil | 14 | 1.29 | 18.06 |
\n",
"2012-09-02 | Central | Jardine | Pencil | 36 | 4.99 | 179.64 |
\n",
"2013-11-17 | Central | Jardine | Binder | 11 | 4.99 | 54.89 |
\n",
"2012-08-11 | East | Parent | Pen | 15 | 19.99 | 299.85 |
\n",
"2013-10-09 | Central | Gill | Pencil | 7 | 1.29 | 9.03 |
\n",
"2013-04-07 | East | Jones | Pen Set | 62 | 4.99 | 309.38 |
\n",
"2013-10-04 | Central | Andrews | Pencil | 66 | 1.99 | 131.34 |
\n",
"2012-09-18 | East | Jones | Pen Set | 16 | 15.99 | 255.84 |
\n",
"2012-07-29 | East | Parent | Binder | 81 | 19.99 | 1619.19 |
\n",
"2013-04-27 | East | Howard | Pen | 96 | 4.99 | 479.04 |
\n",
"2012-08-06 | East | Jones | Binder | 60 | 8.99 | 539.40 |
\n",
"2012-01-04 | East | Jones | Binder | 60 | 4.99 | 299.40 |
\n",
"2012-10-22 | East | Jones | Pen | 64 | 8.99 | 575.36 |
\n",
"2012-06-01 | East | Jones | Pencil | 95 | 1.99 | 189.05 |
\n",
"2012-08-15 | East | Jones | Pencil | 35 | 4.99 | 174.65 |
\n",
"2012-12-07 | East | Howard | Binder | 29 | 1.99 | 57.71 |
\n",
"2013-02-18 | East | Jones | Binder | 4 | 4.99 | 19.96 |
\n",
"2013-10-14 | West | Thompson | Binder | 57 | 19.99 | 1139.43 |
\n",
"2013-08-24 | West | Sorvino | Desk | 3 | 275.00 | 825.00 |
\n",
"2012-03-15 | West | Sorvino | Pencil | 56 | 2.99 | 167.44 |
\n",
"2013-09-27 | West | Sorvino | Pen | 76 | 1.99 | 151.24 |
\n",
"2013-07-03 | West | Sorvino | Binder | 7 | 19.99 | 139.93 |
\n",
"2012-05-22 | West | Thompson | Pencil | 32 | 1.99 | 63.68 |
\n",
"2012-12-29 | East | Parent | Pen Set | 74 | 15.99 | 1183.26 |
\n",
"\n",
"2013-12-21 | 3 | 11 | 5 | 2121 | 20.31 | 19627.88 |
\n",
"\n",
"
\n"
],
"text/plain": [
"Table(len=43,titles=['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total'],data=[[datetime.date(2013, 4, 12), 'Central', 'Jardine', 'Binder', 94, 19.99, 1879.06], [datetime.date(2013, 7, 8), 'Central', 'Kivell', 'Pen Set', 42, 23.95, 1005.9], [datetime.date(2012, 1, 23), 'Central', 'Kivell', 'Binder', 50, 19.99, 999.4999999999999], [datetime.date(2013, 5, 31), 'Central', 'Gill', 'Binder', 80, 8.99, 719.2], [datetime.date(2012, 2, 26), 'Central', 'Gill', 'Pen', 27, 19.99, 539.7299999999999]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from Goulib.math2 import *\n",
"from Goulib.itertools2 import *\n",
"res=Table(t) #copy\n",
"s=len(res)\n",
"for i in range(s-1):\n",
" line=res[i]\n",
" d=[hamming(line,res[j]) for j in range(i+1,s)]\n",
" j=index_min(d)[0]+i\n",
" res[i+1],res[j]=res[j],res[i+1] #swap\n",
"res"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hamming(t[1],t[2])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"complex | content |
\n",
"\n",
"\n",
" | $\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$ |
\n",
" | |
\n",
"
\n"
],
"text/plain": [
"Table(len=2,titles=['complex', 'content'],data=[[Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]]), '$\\\\\\\\LaTeX : \\\\sqrt{\\\\left(a+b\\\\right)\\\\left(a-b\\\\right)}$'], [Image(mode=RGB shape=(128, 128, 3) type=float64), Image(mode=RGB shape=(128, 128, 3) type=float64)]])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Table([[small,r'$\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$'],[lena,lena]],titles=['complex','content'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}