{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDTipsterDateTrackHorseBet TypeOddsResultTipsterActive
UID
11Tipster A24/07/2015AscotFredrickaWin8.00LoseTrue
22Tipster A24/07/2015ThirskSpend A PennyWin4.50LoseTrue
33Tipster A24/07/2015YorkStraightothepointWin7.00LoseTrue
44Tipster A24/07/2015NewmarketMiss Inga SockWin5.00LoseTrue
55Tipster A25/07/2015AscotPerilWin4.33WinTrue
\n", "
" ], "text/plain": [ " ID Tipster Date Track Horse Bet Type Odds \\\n", "UID \n", "1 1 Tipster A 24/07/2015 Ascot Fredricka Win 8.00 \n", "2 2 Tipster A 24/07/2015 Thirsk Spend A Penny Win 4.50 \n", "3 3 Tipster A 24/07/2015 York Straightothepoint Win 7.00 \n", "4 4 Tipster A 24/07/2015 Newmarket Miss Inga Sock Win 5.00 \n", "5 5 Tipster A 25/07/2015 Ascot Peril Win 4.33 \n", "\n", " Result TipsterActive \n", "UID \n", "1 Lose True \n", "2 Lose True \n", "3 Lose True \n", "4 Lose True \n", "5 Win True " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "df = pd.read_csv('./data/tips.csv',index_col='UID',encoding = \"ISO-8859-1\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Tipster', 'Date', 'Track', 'Horse', 'Bet Type', 'Result'], dtype='object')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat_var = df.dtypes.loc[df.dtypes=='object'].index\n", "cat_var" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Tipster 31\n", "Date 1055\n", "Track 116\n", "Horse 15791\n", "Bet Type 3\n", "Result 2\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[cat_var].apply(lambda x: len(x.unique()))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "le = LabelEncoder()\n", "for var in cat_var:\n", " df[var] = le.fit_transform(df[var])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDTipsterDateTrackHorseBet TypeOddsResultTipsterActive
UID
1108182515818.000True
220818961310814.500True
3308181141341117.000True
44081874897615.000True
55085121055414.331True
\n", "
" ], "text/plain": [ " ID Tipster Date Track Horse Bet Type Odds Result TipsterActive\n", "UID \n", "1 1 0 818 2 5158 1 8.00 0 True\n", "2 2 0 818 96 13108 1 4.50 0 True\n", "3 3 0 818 114 13411 1 7.00 0 True\n", "4 4 0 818 74 8976 1 5.00 0 True\n", "5 5 0 851 2 10554 1 4.33 1 True" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "X = df[['Tipster', 'Track', 'Horse', 'Bet Type', 'Odds']]\n", "y = df.Result.values" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.20)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "def standard_pipe(algorism):\n", " return Pipeline([\n", " ('scl', StandardScaler()),\n", " ('est', algorism)\n", " ])" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def fit(algorism):\n", " pipe = standard_pipe(algorism)\n", " pipe.fit(X_train,y_train)\n", " return pipe" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(30598, 5)\n", "(30598,)\n" ] } ], "source": [ "print(X_train.shape)\n", "print(y_train.shape)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "pipe = fit(LogisticRegression())" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.80011765474867635" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accuracy_score(y_train, pipe.predict(X_train))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" } }, "nbformat": 4, "nbformat_minor": 2 }