{ "cells": [ { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from IPython import display\n", "\n", "from collections import namedtuple" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dynamic programming solves a problem by breaking it up into smaller sub-problems, looks up the answer or solves and saves results and then combines the answers together to solve the original problem. \n", "\n", "Key question:\n", "- What are the sub problems?\n", "\n", "A simple example:\n", "\n", "# Fib numbers using dynamic programming" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0]" ] }, "execution_count": 153, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def fib_recursion(n, table=[]):\n", " \"\"\"returns fib of n\"\"\"\n", " \n", " if n <= 1:\n", " return n\n", " \n", " if len(table) < 1:\n", " table = np.zeros(n+1)\n", " if table[n-1] == 0:\n", " table[n-1] = fib_recursion(n-1, table)\n", " if table[n-2] == 0:\n", " table[n-2] = fib_recursion(n-2, table)\n", " \n", " table[n] = table[n-1] + table[n-2]\n", " \n", " return table[n]\n", "\n", "[fib_recursion(n) for n in range(10)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now what if we use a list:'" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]" ] }, "execution_count": 150, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def fib_list(n):\n", " table=[0,1]\n", " for i in range(2, n+1):\n", " table.append(table[i-1]+table[i-2])\n", " return table[n]\n", " \n", "[fib_list(n) for n in range(10)] " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now there isn't any point in saving all the fib numbers, so modifying the code to save only the last two fib numbers:" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1, 1, 1, 2, 3, 5, 8, 13, 21, 34]" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def fib_list_two(n):\n", " table=[0,1]\n", " \n", " for i in range(2, n+1):\n", " table.append(sum(table[-2:]))\n", " del table[0] \n", " return table[1]\n", " \n", "[fib_list_two(n) for n in range(10)]" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", "Wall time: 455 µs\n" ] }, { "data": { "text/plain": [ "3.54224848179262e+20" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%time fib_recursion(100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The recursion fails as exceeds max memory depth on pretty small fib numbers" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", "Wall time: 525 µs\n" ] }, { "data": { "text/plain": [ "43466557686937456435688527675040625802564660517371780402481729089536555417949051890403879840079255169295922593080322634775209689623239873322471161642996440906533187938298969649928516003704476137795166849228875" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%time fib_list(1000)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now this holds all the fib numbers up to n in memory in a list. Which for large n seems un necessary to waste all that memory. " ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", "Wall time: 1.17 ms\n" ] }, { "data": { "text/plain": [ "43466557686937456435688527675040625802564660517371780402481729089536555417949051890403879840079255169295922593080322634775209689623239873322471161642996440906533187938298969649928516003704476137795166849228875" ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%time fib_list_two(1000)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Knapsack problem" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "income_url=\"http://docs.google.com/spreadsheet/pub?key=0AkBd6lyS3EmpdHo5S0J6ekhVOF9QaVhod05QSGV4T3c&output=xlsx\"\n", "life_url=\"http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj2tPLxKvvnNPA&output=xlsx\"\n", "pop_url=\"http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj0XOoBL_n5tAQ&output=xlsx\"" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "income = pd.read_excel(income_url)\n", "life = pd.read_excel(life_url)\n", "pop = pd.read_excel(pop_url)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Income per person (fixed 2000 US$)196019611962196319641965196619671968...2002200320042005200620072008200920102011
0AbkhaziaNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1AfghanistanNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2Akrotiri and DhekeliaNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3AlbaniaNaNNaNNaNNaNNaNNaNNaNNaNNaN...1313.7227251381.0408321454.0228541525.7235891594.4950671681.6139101804.4194151857.3529471915.4244591965.707230
4Algeria1280.3848281085.414612855.9479861128.415781170.3238961215.0157831127.6142881200.5582251291.863983...1871.9219861971.5128032043.1357132115.1860282124.9577542155.4852312173.7879032192.7039762231.9802462255.225482
\n", "

5 rows × 53 columns

\n", "
" ], "text/plain": [ " Income per person (fixed 2000 US$) 1960 1961 1962 \\\n", "0 Abkhazia NaN NaN NaN \n", "1 Afghanistan NaN NaN NaN \n", "2 Akrotiri and Dhekelia NaN NaN NaN \n", "3 Albania NaN NaN NaN \n", "4 Algeria 1280.384828 1085.414612 855.947986 \n", "\n", " 1963 1964 1965 1966 1967 \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 1128.41578 1170.323896 1215.015783 1127.614288 1200.558225 \n", "\n", " 1968 ... 2002 2003 2004 \\\n", "0 NaN ... NaN NaN NaN \n", "1 NaN ... NaN NaN NaN \n", "2 NaN ... NaN NaN NaN \n", "3 NaN ... 1313.722725 1381.040832 1454.022854 \n", "4 1291.863983 ... 1871.921986 1971.512803 2043.135713 \n", "\n", " 2005 2006 2007 2008 2009 \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN NaN \n", "3 1525.723589 1594.495067 1681.613910 1804.419415 1857.352947 \n", "4 2115.186028 2124.957754 2155.485231 2173.787903 2192.703976 \n", "\n", " 2010 2011 \n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 1915.424459 1965.707230 \n", "4 2231.980246 2255.225482 \n", "\n", "[5 rows x 53 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "income.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Life expectancy180018011802180318041805180618071808...2007200820092010201120122013201420152016
0AbkhaziaNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1Afghanistan28.2128.2028.1928.1828.1728.1628.1528.1428.13...52.452.853.353.654.054.454.854.953.852.72
2Akrotiri and DhekeliaNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3Albania35.4035.4035.4035.4035.4035.4035.4035.4035.40...76.676.877.077.277.477.577.777.978.078.10
4Algeria28.8228.8228.8228.8228.8228.8228.8228.8228.82...75.375.575.776.076.176.276.376.376.476.50
\n", "

5 rows × 218 columns

\n", "
" ], "text/plain": [ " Life expectancy 1800 1801 1802 1803 1804 1805 1806 \\\n", "0 Abkhazia NaN NaN NaN NaN NaN NaN NaN \n", "1 Afghanistan 28.21 28.20 28.19 28.18 28.17 28.16 28.15 \n", "2 Akrotiri and Dhekelia NaN NaN NaN NaN NaN NaN NaN \n", "3 Albania 35.40 35.40 35.40 35.40 35.40 35.40 35.40 \n", "4 Algeria 28.82 28.82 28.82 28.82 28.82 28.82 28.82 \n", "\n", " 1807 1808 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 \\\n", "0 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1 28.14 28.13 ... 52.4 52.8 53.3 53.6 54.0 54.4 54.8 54.9 53.8 \n", "2 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "3 35.40 35.40 ... 76.6 76.8 77.0 77.2 77.4 77.5 77.7 77.9 78.0 \n", "4 28.82 28.82 ... 75.3 75.5 75.7 76.0 76.1 76.2 76.3 76.3 76.4 \n", "\n", " 2016 \n", "0 NaN \n", "1 52.72 \n", "2 NaN \n", "3 78.10 \n", "4 76.50 \n", "\n", "[5 rows x 218 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "life.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total population180018101820183018401850186018701880...2006200720082009201020112012201320142015
0AbkhaziaNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1Afghanistan3280000.03280000.03323519.03448982.03625022.03810047.03973968.04169690.04419695.0...25183615.025877544.026528741.027207291.027962207.028809167.029726803.030682500.031627506.032526562.0
2Akrotiri and DhekeliaNaNNaNNaNNaNNaNNaNNaNNaNNaN...15700.015700.015700.0NaNNaNNaNNaNNaNNaNNaN
3Albania410445.0423591.0438671.0457234.0478227.0506889.0552800.0610036.0672544.0...3050741.03010849.02968026.02929886.02901883.02886010.02880667.02883281.02889676.02896679.0
4Algeria2503218.02595056.02713079.02880355.03082721.03299305.03536468.03811028.04143163.0...33749328.034261971.034811059.035401790.036036159.036717132.037439427.038186135.038934334.039666519.0
\n", "

5 rows × 82 columns

\n", "
" ], "text/plain": [ " Total population 1800 1810 1820 1830 \\\n", "0 Abkhazia NaN NaN NaN NaN \n", "1 Afghanistan 3280000.0 3280000.0 3323519.0 3448982.0 \n", "2 Akrotiri and Dhekelia NaN NaN NaN NaN \n", "3 Albania 410445.0 423591.0 438671.0 457234.0 \n", "4 Algeria 2503218.0 2595056.0 2713079.0 2880355.0 \n", "\n", " 1840 1850 1860 1870 1880 ... \\\n", "0 NaN NaN NaN NaN NaN ... \n", "1 3625022.0 3810047.0 3973968.0 4169690.0 4419695.0 ... \n", "2 NaN NaN NaN NaN NaN ... \n", "3 478227.0 506889.0 552800.0 610036.0 672544.0 ... \n", "4 3082721.0 3299305.0 3536468.0 3811028.0 4143163.0 ... \n", "\n", " 2006 2007 2008 2009 2010 2011 \\\n", "0 NaN NaN NaN NaN NaN NaN \n", "1 25183615.0 25877544.0 26528741.0 27207291.0 27962207.0 28809167.0 \n", "2 15700.0 15700.0 15700.0 NaN NaN NaN \n", "3 3050741.0 3010849.0 2968026.0 2929886.0 2901883.0 2886010.0 \n", "4 33749328.0 34261971.0 34811059.0 35401790.0 36036159.0 36717132.0 \n", "\n", " 2012 2013 2014 2015 \n", "0 NaN NaN NaN NaN \n", "1 29726803.0 30682500.0 31627506.0 32526562.0 \n", "2 NaN NaN NaN NaN \n", "3 2880667.0 2883281.0 2889676.0 2896679.0 \n", "4 37439427.0 38186135.0 38934334.0 39666519.0 \n", "\n", "[5 rows x 82 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The most recent year where we have info on all three is 2011, so lets look at 2011 data:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total populationPopulation
0AbkhaziaNaN
1Afghanistan28809167.0
2Akrotiri and DhekeliaNaN
3Albania2886010.0
4Algeria36717132.0
\n", "
" ], "text/plain": [ " Total population Population\n", "0 Abkhazia NaN\n", "1 Afghanistan 28809167.0\n", "2 Akrotiri and Dhekelia NaN\n", "3 Albania 2886010.0\n", "4 Algeria 36717132.0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pop[[\"Total population\", 2011]].copy()\n", "df.rename(columns={2011: \"Population\"}, inplace=True)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total populationPopulationLife
0AbkhaziaNaNNaN
1Afghanistan28809167.054.0
2Akrotiri and DhekeliaNaNNaN
3Albania2886010.077.4
4Algeria36717132.076.1
\n", "
" ], "text/plain": [ " Total population Population Life\n", "0 Abkhazia NaN NaN\n", "1 Afghanistan 28809167.0 54.0\n", "2 Akrotiri and Dhekelia NaN NaN\n", "3 Albania 2886010.0 77.4\n", "4 Algeria 36717132.0 76.1" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.join(life[2011])\n", "df.rename(columns={2011: \"Life\"}, inplace=True)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total populationPopulationLifeIncome
0AbkhaziaNaNNaNNaN
1Afghanistan28809167.054.0NaN
2Akrotiri and DhekeliaNaNNaNNaN
3Albania2886010.077.41965.707230
4Algeria36717132.076.12255.225482
\n", "
" ], "text/plain": [ " Total population Population Life Income\n", "0 Abkhazia NaN NaN NaN\n", "1 Afghanistan 28809167.0 54.0 NaN\n", "2 Akrotiri and Dhekelia NaN NaN NaN\n", "3 Albania 2886010.0 77.4 1965.707230\n", "4 Algeria 36717132.0 76.1 2255.225482" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.join(income[\"2011\"])\n", "df.rename(columns={\"2011\": \"Income\"}, inplace=True)\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So now we have a dataframe containing country names, their population, adjusted per capita income and life expectance.\n", "\n", "To simplify things, I'm going to drop the missing values:" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total populationPopulationLifeIncome
3Albania2886010.077.41965.707230
4Algeria36717132.076.12255.225482
7Angola21942296.058.1629.955306
9Antigua and Barbuda88152.075.99977.957073
10Argentina41655616.076.011601.630223
\n", "
" ], "text/plain": [ " Total population Population Life Income\n", "3 Albania 2886010.0 77.4 1965.707230\n", "4 Algeria 36717132.0 76.1 2255.225482\n", "7 Angola 21942296.0 58.1 629.955306\n", "9 Antigua and Barbuda 88152.0 75.9 9977.957073\n", "10 Argentina 41655616.0 76.0 11601.630223" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dropna(inplace=True)\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lets say we want 500 million people with the highest income. So the population becomes the \"weights\", and the income the \"values\" of this knapsack problem." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So lets divide this problem into sizes of 10M, so we go 10, 20, 30... all the way to 500M\n", "\n", "first, starting with 10 to 50M to keep it simple, heres our table:" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
1020304050
Total population
Albania00000
Algeria00000
Angola00000
Antigua and Barbuda00000
Argentina00000
\n", "
" ], "text/plain": [ " 10 20 30 40 50\n", "Total population \n", "Albania 0 0 0 0 0\n", "Algeria 0 0 0 0 0\n", "Angola 0 0 0 0 0\n", "Antigua and Barbuda 0 0 0 0 0\n", "Argentina 0 0 0 0 0" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "solved = pd.DataFrame(columns=np.arange(10,51,10), index=df[\"Total population\"])\n", "solved.fillna(value=0, inplace=True)\n", "solved.head()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(100, 200, ['countryA', 'B'])" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ans = {}\n", "ans[10] = 100, 200, [\"countryA\", \"B\"]\n", "ans[10]" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total population Antigua and Barbuda\n", "Population 88152\n", "Life 75.9\n", "Income 9977.96\n", "Name: 9, dtype: object\n", "Total population Dominica\n", "Population 71402\n", "Life 73.4\n", "Income 6518.66\n", "Name: 61, dtype: object\n", "Total population Marshall Islands\n", "Population 52541\n", "Life 66\n", "Income 2522.82\n", "Name: 139, dtype: object\n", "Total population Seychelles\n", "Population 93810\n", "Life 73.4\n", "Income 9279.11\n", "Name: 202, dtype: object\n" ] }, { "data": { "text/plain": [ "1000000000" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def pop_income(pop_size):\n", " \"\"\"takes in a pop_size, and returns a tuple containing\n", " max income and pop and a list of countries chosen\"\"\"\n", " if pop_size in solved.keys():\n", " return solved[max]\n", " pop = 0\n", " income = 0\n", " \n", " for i, row in df[df[\"Population\"]<= pop_size].iterrows():\n", " print(row)\n", "\n", "pop_income(10*np.power(10,4))\n", "np.power(10,9) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }