{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import glob\n",
"import pandas as pd\n",
"import os\n",
"import statsmodels.api as sm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" entity | \n",
" cpa | \n",
" closest_entity | \n",
" time | \n",
" run | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 5.476739 | \n",
" 2 | \n",
" 52.3 | \n",
" 77 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 5.476739 | \n",
" 1 | \n",
" 52.3 | \n",
" 77 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 1.919017 | \n",
" 2 | \n",
" 5.4 | \n",
" 86 | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" 1.919017 | \n",
" 1 | \n",
" 5.4 | \n",
" 86 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 4.505857 | \n",
" 2 | \n",
" 10.4 | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" entity cpa closest_entity time run\n",
"0 1 5.476739 2 52.3 77\n",
"1 2 5.476739 1 52.3 77\n",
"2 1 1.919017 2 5.4 86\n",
"3 2 1.919017 1 5.4 86\n",
"4 1 4.505857 2 10.4 9"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_dir = (os.path.join(os.path.expanduser('~'), '.scrimmage', 'experiments', 'my_first_parameter_varying'))\n",
"files = glob.glob(os.path.join(log_dir, '*_job_*', 'cpa.csv'))\n",
"\n",
"agg = pd.DataFrame()\n",
"for file in files:\n",
" run_num = int((os.path.basename(os.path.dirname(file))).split('_')[-1])\n",
" frame = pd.read_csv(file)\n",
" frame['run'] = run_num\n",
" agg = pd.concat([agg, frame], copy=False)\n",
"agg = agg.reset_index(drop=True)\n",
"agg.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MS_gain | \n",
" max_speed | \n",
"
\n",
" \n",
" run | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1.733505 | \n",
" 24.813339 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.497387 | \n",
" 22.070393 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.515396 | \n",
" 19.717730 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.053652 | \n",
" 17.737459 | \n",
"
\n",
" \n",
" 5 | \n",
" 1.957513 | \n",
" 20.121153 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MS_gain max_speed\n",
"run \n",
"1 1.733505 24.813339\n",
"2 0.497387 22.070393\n",
"3 0.515396 19.717730\n",
"4 0.053652 17.737459\n",
"5 1.957513 20.121153"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"params_agg = pd.read_csv(os.path.join(log_dir, 'batch_params.csv'), index_col='run')\n",
"params_agg.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" entity | \n",
" cpa | \n",
" closest_entity | \n",
" time | \n",
" run | \n",
" MS_gain | \n",
" max_speed | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 5.476739 | \n",
" 2 | \n",
" 52.3 | \n",
" 77 | \n",
" 1.805475 | \n",
" 20.063098 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 5.476739 | \n",
" 1 | \n",
" 52.3 | \n",
" 77 | \n",
" 1.805475 | \n",
" 20.063098 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 1.919017 | \n",
" 2 | \n",
" 5.4 | \n",
" 86 | \n",
" 0.336285 | \n",
" 24.769219 | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" 1.919017 | \n",
" 1 | \n",
" 5.4 | \n",
" 86 | \n",
" 0.336285 | \n",
" 24.769219 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 4.505857 | \n",
" 2 | \n",
" 10.4 | \n",
" 9 | \n",
" 1.370620 | \n",
" 16.866877 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" entity cpa closest_entity time run MS_gain max_speed\n",
"0 1 5.476739 2 52.3 77 1.805475 20.063098\n",
"1 2 5.476739 1 52.3 77 1.805475 20.063098\n",
"2 1 1.919017 2 5.4 86 0.336285 24.769219\n",
"3 2 1.919017 1 5.4 86 0.336285 24.769219\n",
"4 1 4.505857 2 10.4 9 1.370620 16.866877"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = agg.join(params_agg, on='run')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" entity | \n",
" cpa | \n",
" closest_entity | \n",
" time | \n",
" run | \n",
" MS_gain | \n",
" max_speed | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 5.476739 | \n",
" 2 | \n",
" 52.3 | \n",
" 77 | \n",
" 1.805475 | \n",
" 20.063098 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 1.919017 | \n",
" 2 | \n",
" 5.4 | \n",
" 86 | \n",
" 0.336285 | \n",
" 24.769219 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 4.505857 | \n",
" 2 | \n",
" 10.4 | \n",
" 9 | \n",
" 1.370620 | \n",
" 16.866877 | \n",
"
\n",
" \n",
" 6 | \n",
" 1 | \n",
" 4.180133 | \n",
" 2 | \n",
" 23.9 | \n",
" 84 | \n",
" 1.346520 | \n",
" 22.231913 | \n",
"
\n",
" \n",
" 8 | \n",
" 1 | \n",
" 2.104974 | \n",
" 2 | \n",
" 9.2 | \n",
" 100 | \n",
" 0.669359 | \n",
" 17.816883 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" entity cpa closest_entity time run MS_gain max_speed\n",
"0 1 5.476739 2 52.3 77 1.805475 20.063098\n",
"2 1 1.919017 2 5.4 86 0.336285 24.769219\n",
"4 1 4.505857 2 10.4 9 1.370620 16.866877\n",
"6 1 4.180133 2 23.9 84 1.346520 22.231913\n",
"8 1 2.104974 2 9.2 100 0.669359 17.816883"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"entity1 = data[::2]\n",
"entity1.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"entity1.to_csv(os.path.join(log_dir, 'entity_1_data.csv'), index_label='index')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | cpa | R-squared: | 0.972 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.972 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 1730. | \n",
"
\n",
"\n",
" Date: | Wed, 05 Dec 2018 | Prob (F-statistic): | 3.65e-77 | \n",
"
\n",
"\n",
" Time: | 14:49:16 | Log-Likelihood: | -95.970 | \n",
"
\n",
"\n",
" No. Observations: | 100 | AIC: | 195.9 | \n",
"
\n",
"\n",
" Df Residuals: | 98 | BIC: | 201.2 | \n",
"
\n",
"\n",
" Df Model: | 2 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" MS_gain | 2.6621 | 0.109 | 24.399 | 0.000 | 2.446 | 2.879 | \n",
"
\n",
"\n",
" max_speed | 0.0380 | 0.006 | 6.095 | 0.000 | 0.026 | 0.050 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 136.337 | Durbin-Watson: | 2.117 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 4337.281 | \n",
"
\n",
"\n",
" Skew: | 4.736 | Prob(JB): | 0.00 | \n",
"
\n",
"\n",
" Kurtosis: | 33.842 | Cond. No. | 34.6 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: cpa R-squared: 0.972\n",
"Model: OLS Adj. R-squared: 0.972\n",
"Method: Least Squares F-statistic: 1730.\n",
"Date: Wed, 05 Dec 2018 Prob (F-statistic): 3.65e-77\n",
"Time: 14:49:16 Log-Likelihood: -95.970\n",
"No. Observations: 100 AIC: 195.9\n",
"Df Residuals: 98 BIC: 201.2\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"MS_gain 2.6621 0.109 24.399 0.000 2.446 2.879\n",
"max_speed 0.0380 0.006 6.095 0.000 0.026 0.050\n",
"==============================================================================\n",
"Omnibus: 136.337 Durbin-Watson: 2.117\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 4337.281\n",
"Skew: 4.736 Prob(JB): 0.00\n",
"Kurtosis: 33.842 Cond. No. 34.6\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Basic statistical analysis\n",
"X = entity1[[\"MS_gain\", 'max_speed']]\n",
"y = entity1['cpa']\n",
"\n",
"model = sm.OLS(y, X).fit()\n",
"predictions = model.predict(X)\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}