{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" Tipster | \n",
" Date | \n",
" Track | \n",
" Horse | \n",
" Bet Type | \n",
" Odds | \n",
" Result | \n",
" TipsterActive | \n",
"
\n",
" \n",
" UID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1 | \n",
" Tipster A | \n",
" 24/07/2015 | \n",
" Ascot | \n",
" Fredricka | \n",
" Win | \n",
" 8.00 | \n",
" Lose | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" Tipster A | \n",
" 24/07/2015 | \n",
" Thirsk | \n",
" Spend A Penny | \n",
" Win | \n",
" 4.50 | \n",
" Lose | \n",
" True | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" Tipster A | \n",
" 24/07/2015 | \n",
" York | \n",
" Straightothepoint | \n",
" Win | \n",
" 7.00 | \n",
" Lose | \n",
" True | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Tipster A | \n",
" 24/07/2015 | \n",
" Newmarket | \n",
" Miss Inga Sock | \n",
" Win | \n",
" 5.00 | \n",
" Lose | \n",
" True | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" Tipster A | \n",
" 25/07/2015 | \n",
" Ascot | \n",
" Peril | \n",
" Win | \n",
" 4.33 | \n",
" Win | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID Tipster Date Track Horse Bet Type Odds \\\n",
"UID \n",
"1 1 Tipster A 24/07/2015 Ascot Fredricka Win 8.00 \n",
"2 2 Tipster A 24/07/2015 Thirsk Spend A Penny Win 4.50 \n",
"3 3 Tipster A 24/07/2015 York Straightothepoint Win 7.00 \n",
"4 4 Tipster A 24/07/2015 Newmarket Miss Inga Sock Win 5.00 \n",
"5 5 Tipster A 25/07/2015 Ascot Peril Win 4.33 \n",
"\n",
" Result TipsterActive \n",
"UID \n",
"1 Lose True \n",
"2 Lose True \n",
"3 Lose True \n",
"4 Lose True \n",
"5 Win True "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"df = pd.read_csv('./data/tips.csv',index_col='UID',encoding = \"ISO-8859-1\")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Tipster', 'Date', 'Track', 'Horse', 'Bet Type', 'Result'], dtype='object')"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_var = df.dtypes.loc[df.dtypes=='object'].index\n",
"cat_var"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Tipster 31\n",
"Date 1055\n",
"Track 116\n",
"Horse 15791\n",
"Bet Type 3\n",
"Result 2\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[cat_var].apply(lambda x: len(x.unique()))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"le = LabelEncoder()\n",
"for var in cat_var:\n",
" df[var] = le.fit_transform(df[var])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" Tipster | \n",
" Date | \n",
" Track | \n",
" Horse | \n",
" Bet Type | \n",
" Odds | \n",
" Result | \n",
" TipsterActive | \n",
"
\n",
" \n",
" UID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 818 | \n",
" 2 | \n",
" 5158 | \n",
" 1 | \n",
" 8.00 | \n",
" 0 | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 0 | \n",
" 818 | \n",
" 96 | \n",
" 13108 | \n",
" 1 | \n",
" 4.50 | \n",
" 0 | \n",
" True | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 0 | \n",
" 818 | \n",
" 114 | \n",
" 13411 | \n",
" 1 | \n",
" 7.00 | \n",
" 0 | \n",
" True | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 0 | \n",
" 818 | \n",
" 74 | \n",
" 8976 | \n",
" 1 | \n",
" 5.00 | \n",
" 0 | \n",
" True | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" 0 | \n",
" 851 | \n",
" 2 | \n",
" 10554 | \n",
" 1 | \n",
" 4.33 | \n",
" 1 | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID Tipster Date Track Horse Bet Type Odds Result TipsterActive\n",
"UID \n",
"1 1 0 818 2 5158 1 8.00 0 True\n",
"2 2 0 818 96 13108 1 4.50 0 True\n",
"3 3 0 818 114 13411 1 7.00 0 True\n",
"4 4 0 818 74 8976 1 5.00 0 True\n",
"5 5 0 851 2 10554 1 4.33 1 True"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"X = df[['Tipster', 'Track', 'Horse', 'Bet Type', 'Odds']]\n",
"y = df.Result.values"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.20)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"def standard_pipe(algorism):\n",
" return Pipeline([\n",
" ('scl', StandardScaler()),\n",
" ('est', algorism)\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def fit(algorism):\n",
" pipe = standard_pipe(algorism)\n",
" pipe.fit(X_train,y_train)\n",
" return pipe"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(30598, 5)\n",
"(30598,)\n"
]
}
],
"source": [
"print(X_train.shape)\n",
"print(y_train.shape)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"pipe = fit(LogisticRegression())"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.80011765474867635"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"accuracy_score(y_train, pipe.predict(X_train))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}