{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Retain in Pandas\n",
"## Similar question: \n",
"https://stackoverflow.com/questions/33569265/retain-function-in-python\n",
"\n",
"### Any better way to do this than using a for loop or apply?"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Date | \n",
" Quantity | \n",
" Price | \n",
" Total Shares | \n",
" Cost | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2017-01-10 | \n",
" 1000 | \n",
" 117.2249 | \n",
" 1000 | \n",
" 117224.90 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-02-10 | \n",
" -500 | \n",
" 130.5928 | \n",
" 500 | \n",
" -65296.40 | \n",
"
\n",
" \n",
" 2 | \n",
" 2017-03-10 | \n",
" 1500 | \n",
" 137.5316 | \n",
" 2000 | \n",
" 206297.40 | \n",
"
\n",
" \n",
" 3 | \n",
" 2017-04-10 | \n",
" -2000 | \n",
" 141.5150 | \n",
" 0 | \n",
" -283030.00 | \n",
"
\n",
" \n",
" 4 | \n",
" 2017-05-10 | \n",
" 500 | \n",
" 151.4884 | \n",
" 500 | \n",
" 75744.20 | \n",
"
\n",
" \n",
" 5 | \n",
" 2017-06-09 | \n",
" 500 | \n",
" 147.8657 | \n",
" 1000 | \n",
" 73932.85 | \n",
"
\n",
" \n",
" 6 | \n",
" 2017-07-10 | \n",
" 500 | \n",
" 143.9750 | \n",
" 1500 | \n",
" 71987.50 | \n",
"
\n",
" \n",
" 7 | \n",
" 2017-08-10 | \n",
" -1000 | \n",
" 154.7636 | \n",
" 500 | \n",
" -154763.60 | \n",
"
\n",
" \n",
" 8 | \n",
" 2017-09-11 | \n",
" -200 | \n",
" 160.9215 | \n",
" 300 | \n",
" -32184.30 | \n",
"
\n",
" \n",
" 9 | \n",
" 2017-10-10 | \n",
" 1000 | \n",
" 155.3416 | \n",
" 1300 | \n",
" 155341.60 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Date Quantity Price Total Shares Cost\n",
"0 2017-01-10 1000 117.2249 1000 117224.90\n",
"1 2017-02-10 -500 130.5928 500 -65296.40\n",
"2 2017-03-10 1500 137.5316 2000 206297.40\n",
"3 2017-04-10 -2000 141.5150 0 -283030.00\n",
"4 2017-05-10 500 151.4884 500 75744.20\n",
"5 2017-06-09 500 147.8657 1000 73932.85\n",
"6 2017-07-10 500 143.9750 1500 71987.50\n",
"7 2017-08-10 -1000 154.7636 500 -154763.60\n",
"8 2017-09-11 -200 160.9215 300 -32184.30\n",
"9 2017-10-10 1000 155.3416 1300 155341.60"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aapl = pd.read_csv('AAPL_exmaple.csv', parse_dates=['Date'])\n",
"aapl['Total Shares'] = aapl['Quantity'].cumsum()\n",
"aapl['Cost'] = aapl['Quantity']*aapl['Price']\n",
"aapl"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Date | \n",
" Quantity | \n",
" Price | \n",
" Total Shares | \n",
" Cost | \n",
" Ave Cost | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2017-01-10 | \n",
" 1000 | \n",
" 117.2249 | \n",
" 1000 | \n",
" 117224.90 | \n",
" 117.224900 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-02-10 | \n",
" -500 | \n",
" 130.5928 | \n",
" 500 | \n",
" -65296.40 | \n",
" 117.224900 | \n",
"
\n",
" \n",
" 2 | \n",
" 2017-03-10 | \n",
" 1500 | \n",
" 137.5316 | \n",
" 2000 | \n",
" 206297.40 | \n",
" 132.454925 | \n",
"
\n",
" \n",
" 3 | \n",
" 2017-04-10 | \n",
" -2000 | \n",
" 141.5150 | \n",
" 0 | \n",
" -283030.00 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 2017-05-10 | \n",
" 500 | \n",
" 151.4884 | \n",
" 500 | \n",
" 75744.20 | \n",
" 151.488400 | \n",
"
\n",
" \n",
" 5 | \n",
" 2017-06-09 | \n",
" 500 | \n",
" 147.8657 | \n",
" 1000 | \n",
" 73932.85 | \n",
" 149.677050 | \n",
"
\n",
" \n",
" 6 | \n",
" 2017-07-10 | \n",
" 500 | \n",
" 143.9750 | \n",
" 1500 | \n",
" 71987.50 | \n",
" 147.776367 | \n",
"
\n",
" \n",
" 7 | \n",
" 2017-08-10 | \n",
" -1000 | \n",
" 154.7636 | \n",
" 500 | \n",
" -154763.60 | \n",
" 147.776367 | \n",
"
\n",
" \n",
" 8 | \n",
" 2017-09-11 | \n",
" -200 | \n",
" 160.9215 | \n",
" 300 | \n",
" -32184.30 | \n",
" 147.776367 | \n",
"
\n",
" \n",
" 9 | \n",
" 2017-10-10 | \n",
" 1000 | \n",
" 155.3416 | \n",
" 1300 | \n",
" 155341.60 | \n",
" 153.595777 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Date Quantity Price Total Shares Cost Ave Cost\n",
"0 2017-01-10 1000 117.2249 1000 117224.90 117.224900\n",
"1 2017-02-10 -500 130.5928 500 -65296.40 117.224900\n",
"2 2017-03-10 1500 137.5316 2000 206297.40 132.454925\n",
"3 2017-04-10 -2000 141.5150 0 -283030.00 0.000000\n",
"4 2017-05-10 500 151.4884 500 75744.20 151.488400\n",
"5 2017-06-09 500 147.8657 1000 73932.85 149.677050\n",
"6 2017-07-10 500 143.9750 1500 71987.50 147.776367\n",
"7 2017-08-10 -1000 154.7636 500 -154763.60 147.776367\n",
"8 2017-09-11 -200 160.9215 300 -32184.30 147.776367\n",
"9 2017-10-10 1000 155.3416 1300 155341.60 153.595777"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_ave_cost(df):\n",
" for index, row in df.iterrows():\n",
" if index == 0:\n",
" df.loc[index,'Ave Cost'] = row['Price']\n",
" elif row['Total Shares'] == 0:\n",
" df.loc[index,'Ave Cost'] = 0.0\n",
" else:\n",
" if row['Quantity'] > 0:\n",
" df.loc[index,'Ave Cost'] = ((df.loc[index - 1,'Ave Cost'] * df.loc[index - 1,'Total Shares']) \\\n",
" + row['Cost'])/row['Total Shares']\n",
" else:\n",
" df.loc[index,'Ave Cost'] = df.loc[index - 1,'Ave Cost']\n",
" return df\n",
"\n",
"aapl_ave_cost = get_ave_cost(aapl)\n",
"aapl_ave_cost"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 loops, best of 3: 4.81 ms per loop\n"
]
}
],
"source": [
"%timeit get_ave_cost(aapl)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The root problem is using for loops instead of a vectorized solution."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}