{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "source": [ "%pylab inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##SQL Exercise" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearidteamidlgidplayeridsalary
0 1985 ATL NL barkele01 870000
1 1985 ATL NL bedrost01 550000
2 1985 ATL NL benedbr01 545000
3 1985 ATL NL campri01 633333
4 1985 ATL NL ceronri01 625000
\n", "
" ], "text/plain": [ " yearid teamid lgid playerid salary\n", "0 1985 ATL NL barkele01 870000\n", "1 1985 ATL NL bedrost01 550000\n", "2 1985 ATL NL benedbr01 545000\n", "3 1985 ATL NL campri01 633333\n", "4 1985 ATL NL ceronri01 625000" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from sqlalchemy import create_engine\n", "cnx = ################################\n", "query = \"\"\"SELECT * FROM salaries LIMIT 10\"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def show_columns(table, con):\n", " from pandas import read_sql_table\n", " return read_sql_table(table, con).columns" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index([u'yearid', u'teamid', u'lgid', u'playerid', u'salary'], dtype='object')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "show_columns('salaries',cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###1. Show all playerids and salaries with a salary in the year 1985 above 500k." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playeridsalary
0 barkele01 870000
1 bedrost01 550000
2 benedbr01 545000
3 campri01 633333
4 ceronri01 625000
\n", "
" ], "text/plain": [ " playerid salary\n", "0 barkele01 870000\n", "1 bedrost01 550000\n", "2 benedbr01 545000\n", "3 campri01 633333\n", "4 ceronri01 625000" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select playerid, salary \n", " from salaries \n", " where yearid = 1985 and salary > 500000 \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###2. Show the team for each year that had a rank of 1" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearidteamid
0 1871 PH1
1 1872 BS1
2 1873 BS1
3 1874 BS1
4 1875 BS1
\n", "
" ], "text/plain": [ " yearid teamid\n", "0 1871 PH1\n", "1 1872 BS1\n", "2 1873 BS1\n", "3 1874 BS1\n", "4 1875 BS1" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select yearid, teamid \n", " from teams \n", " where rank = 1 \n", " order by yearid \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. How many schools are in schoolstate of CT?" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
0 15
\n", "
" ], "text/plain": [ " count\n", "0 15" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select count(schoolid) \n", " from schools \n", " where schoolstate = 'CT' \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###4. How many schools are there in each state?" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
schoolstatecount
0 PA 72
1 AZ 14
2 FL 58
3 LA 17
4 NM 7
\n", "
" ], "text/plain": [ " schoolstate count\n", "0 PA 72\n", "1 AZ 14\n", "2 FL 58\n", "3 LA 17\n", "4 NM 7" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select schoolstate, count(schoolid) \n", " from schools \n", " group by schoolstate\"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###5. What was the total spend on salaries by each team, each year?" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearidteamidsum
0 1985 LAN 10967917
1 1997 SLN 45456667
2 2008 ATL 102365683
3 2014 DET 152855500
4 2007 CHN 99670332
\n", "
" ], "text/plain": [ " yearid teamid sum\n", "0 1985 LAN 10967917\n", "1 1997 SLN 45456667\n", "2 2008 ATL 102365683\n", "3 2014 DET 152855500\n", "4 2007 CHN 99670332" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select yearid, teamid, sum(salary)\n", " from salaries \n", " group by yearid,teamid\"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###6. Find all of the salaries of shortstops (fieldings, pos) for the year 2012." ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
salary
0 1900000
1 1900000
2 1900000
3 1900000
4 1900000
\n", "
" ], "text/plain": [ " salary\n", "0 1900000\n", "1 1900000\n", "2 1900000\n", "3 1900000\n", "4 1900000" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select salaries.salary\n", " from salaries inner join fielding on salaries.playerid = fielding.playerid\n", " where salaries.yearid = 2012 and fielding.pos = 'SS'\n", " \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7. What is the first and last year played for each player?" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playeridfirst_yearlast_year
0 putkolu01 2014 2014
1 bookech01 2006 2006
2 wisede01 2000 2013
3 contrjo01 2003 2012
4 myersro02 1997 1998
\n", "
" ], "text/plain": [ " playerid first_year last_year\n", "0 putkolu01 2014 2014\n", "1 bookech01 2006 2006\n", "2 wisede01 2000 2013\n", "3 contrjo01 2003 2012\n", "4 myersro02 1997 1998" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select playerid, min(yearid) as first_year, max(yearid) as last_year \n", " from salaries\n", " group by playerid\n", " \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###8. Who has played the most all star games?" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playeridallstar
0 aaronha01 25
\n", "
" ], "text/plain": [ " playerid allstar\n", "0 aaronha01 25" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select playerid, count(distinct gameid) allstar from allstarfull\n", " group by playerid\n", " order by allstar desc\n", " limit 1\n", " \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###9. Which school has generated the most distinct players?" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
schoolidcount
0 texas 107
1 usc 105
2 arizonast 101
3 stanford 86
4 michigan 76
\n", "
" ], "text/plain": [ " schoolid count\n", "0 texas 107\n", "1 usc 105\n", "2 arizonast 101\n", "3 stanford 86\n", "4 michigan 76" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select schoolid, count(distinct playerid) \n", " from collegeplaying \n", " group by schoolid \n", " order by count(distinct playerid) desc\n", " \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###10. Which school has generated the most expensive players? (expensive defined by their first year's salary).\n" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
schoolidsalary
0 msmerid 25000000
1 swmost 25000000
2 arkansas 25000000
3 swmost 25000000
4 msmerid 25000000
5 swmost 25000000
6 longbeach 23428571
7 longbeach 23428571
8 longbeach 23428571
9 gatech 23125000
\n", "
" ], "text/plain": [ " schoolid salary\n", "0 msmerid 25000000\n", "1 swmost 25000000\n", "2 arkansas 25000000\n", "3 swmost 25000000\n", "4 msmerid 25000000\n", "5 swmost 25000000\n", "6 longbeach 23428571\n", "7 longbeach 23428571\n", "8 longbeach 23428571\n", "9 gatech 23125000" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select a.schoolid, b.salary \n", " from collegeplaying a\n", " inner join\n", " (select playerid, salary, min(yearid)\n", " from salaries\n", " group by playerid, salary) b \n", " on a.playerid = b.playerid \n", " order by b.salary desc\n", " \"\"\"\n", "pd.read_sql_query(query, cnx).head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 11. Show the 5 most expensive salaries for each team in the year 2014." ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
teamidsalary
0 BAL 13000000
1 BAL 15000000
2 CHA 15000000
3 CLE 13500000
4 CLE 15000000
5 DET 22000000
6 KCA 13500000
7 LAA 23000000
8 LAN 21000000
9 LAN 26000000
10 LAN 21000000
11 MIL 16000000
12 MIN 23000000
13 NYM 9000000
14 NYM 3625000
15 NYM 13000000
16 NYM 5700000
17 NYM 5000000
18 NYM 3700000
19 NYN 20000000
20 PHI 22500000
21 PHI 25000000
22 PHI 25000000
23 PIT 13000000
24 SDN 10525000
25 SEA 24000000
26 SEA 22857000
27 SFG 11000000
28 SFG 17000000
29 SFG 10250000
30 SFG 14500000
31 SFG 16000000
32 SFG 12500000
33 SFN 20000000
34 SLN 17000000
35 SLN 19500000
36 TBA 9000000
37 TBA 14000000
38 TEX 17000000
39 TEX 24000000
40 TOR 16000000
41 WAS 20000000
42 WAS 14000000
\n", "
" ], "text/plain": [ " teamid salary\n", "0 BAL 13000000\n", "1 BAL 15000000\n", "2 CHA 15000000\n", "3 CLE 13500000\n", "4 CLE 15000000\n", "5 DET 22000000\n", "6 KCA 13500000\n", "7 LAA 23000000\n", "8 LAN 21000000\n", "9 LAN 26000000\n", "10 LAN 21000000\n", "11 MIL 16000000\n", "12 MIN 23000000\n", "13 NYM 9000000\n", "14 NYM 3625000\n", "15 NYM 13000000\n", "16 NYM 5700000\n", "17 NYM 5000000\n", "18 NYM 3700000\n", "19 NYN 20000000\n", "20 PHI 22500000\n", "21 PHI 25000000\n", "22 PHI 25000000\n", "23 PIT 13000000\n", "24 SDN 10525000\n", "25 SEA 24000000\n", "26 SEA 22857000\n", "27 SFG 11000000\n", "28 SFG 17000000\n", "29 SFG 10250000\n", "30 SFG 14500000\n", "31 SFG 16000000\n", "32 SFG 12500000\n", "33 SFN 20000000\n", "34 SLN 17000000\n", "35 SLN 19500000\n", "36 TBA 9000000\n", "37 TBA 14000000\n", "38 TEX 17000000\n", "39 TEX 24000000\n", "40 TOR 16000000\n", "41 WAS 20000000\n", "42 WAS 14000000" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"select teamid, salary\n", " from salaries\n", " where(\n", " select count(*) from salaries as s\n", " where s.teamid = salaries.teamid and salaries.salary < s.salary) <=5 and yearid=2014\n", "\n", " \"\"\"\n", "pd.read_sql_query(query, cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 12. Partition the average salaries by team and year, against year. Find players that were paid more than 1 standard deviation above the average salary for that team and year. Show a count by playerid." ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
teamidyearidcount
0 SLN 2009 3
1 KCA 1995 5
2 ANA 1999 6
3 SEA 2007 4
4 PHI 1991 6
\n", "
" ], "text/plain": [ " teamid yearid count\n", "0 SLN 2009 3\n", "1 KCA 1995 5\n", "2 ANA 1999 6\n", "3 SEA 2007 4\n", "4 PHI 1991 6" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"\n", " select b.teamid, b.yearid, count(playerid) from salaries inner join\n", " (select * from\n", " (select teamid, yearid,\n", " (AVG(salary) over (PARTITION BY yearid, teamid) + STDDEV(salary) over (PARTITION BY yearid, teamid)) threshold\n", " from salaries) a \n", " group by a.teamid, a.yearid, a.threshold) b \n", " on salaries.teamid = b.teamid and salaries.yearid = b.yearid \n", " where salaries.salary>b.threshold\n", " group by b.teamid, b.yearid\n", " \"\"\"\n", "pd.read_sql_query(query, cnx).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###13 . Calculate the win percentage. convert w and g into numerics (floats) to do so. (w::numeric, for example)" ] }, { "cell_type": "code", "execution_count": 134, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
teamidwin_percentage
0 NYA 0.566587
1 MLA 0.345324
2 SL1 0.210526
3 BFN 0.478659
4 PTP 0.468750
5 HOU 0.486883
6 BLU 0.547170
7 CAL 0.482404
8 COL 0.468589
9 TBA 0.462209
10 KCN 0.238095
11 NY3 0.368421
12 CL6 0.478469
13 CHA 0.501799
14 MIL 0.472575
15 WS6 0.178571
16 KCA 0.482297
17 HR1 0.503597
18 KCF 0.482085
19 BR3 0.523627
20 PHN 0.233333
21 WAS 0.430915
22 MLU 0.666667
23 CHN 0.506994
24 LAA 0.535509
25 BRF 0.474194
26 CHF 0.554487
27 LS2 0.465531
28 IN1 0.380952
29 CN2 0.573668
.........
119 SEA 0.467761
120 MIA 0.427984
121 PRO 0.604138
122 BRP 0.571429
123 LS3 0.374107
124 TRO 0.518519
125 IN3 0.366834
126 SL2 0.557143
127 WOR 0.357143
128 RC2 0.473684
129 FW1 0.368421
130 ELI 0.086957
131 PHA 0.473152
132 CL4 0.481409
133 BR4 0.260000
134 SE1 0.392638
135 PHI 0.470214
136 LS1 0.500000
137 WS7 0.190476
138 CIN 0.501420
139 MON 0.483164
140 KC1 0.402427
141 DTN 0.484091
142 WS2 0.417372
143 OAK 0.522047
144 NH1 0.148936
145 SL5 0.333333
146 WS8 0.317121
147 BLF 0.417197
148 SDN 0.463701
\n", "

149 rows × 2 columns

\n", "
" ], "text/plain": [ " teamid win_percentage\n", "0 NYA 0.566587\n", "1 MLA 0.345324\n", "2 SL1 0.210526\n", "3 BFN 0.478659\n", "4 PTP 0.468750\n", "5 HOU 0.486883\n", "6 BLU 0.547170\n", "7 CAL 0.482404\n", "8 COL 0.468589\n", "9 TBA 0.462209\n", "10 KCN 0.238095\n", "11 NY3 0.368421\n", "12 CL6 0.478469\n", "13 CHA 0.501799\n", "14 MIL 0.472575\n", "15 WS6 0.178571\n", "16 KCA 0.482297\n", "17 HR1 0.503597\n", "18 KCF 0.482085\n", "19 BR3 0.523627\n", "20 PHN 0.233333\n", "21 WAS 0.430915\n", "22 MLU 0.666667\n", "23 CHN 0.506994\n", "24 LAA 0.535509\n", "25 BRF 0.474194\n", "26 CHF 0.554487\n", "27 LS2 0.465531\n", "28 IN1 0.380952\n", "29 CN2 0.573668\n", ".. ... ...\n", "119 SEA 0.467761\n", "120 MIA 0.427984\n", "121 PRO 0.604138\n", "122 BRP 0.571429\n", "123 LS3 0.374107\n", "124 TRO 0.518519\n", "125 IN3 0.366834\n", "126 SL2 0.557143\n", "127 WOR 0.357143\n", "128 RC2 0.473684\n", "129 FW1 0.368421\n", "130 ELI 0.086957\n", "131 PHA 0.473152\n", "132 CL4 0.481409\n", "133 BR4 0.260000\n", "134 SE1 0.392638\n", "135 PHI 0.470214\n", "136 LS1 0.500000\n", "137 WS7 0.190476\n", "138 CIN 0.501420\n", "139 MON 0.483164\n", "140 KC1 0.402427\n", "141 DTN 0.484091\n", "142 WS2 0.417372\n", "143 OAK 0.522047\n", "144 NH1 0.148936\n", "145 SL5 0.333333\n", "146 WS8 0.317121\n", "147 BLF 0.417197\n", "148 SDN 0.463701\n", "\n", "[149 rows x 2 columns]" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"\n", " select teamid, sum(cast(w AS numeric))/sum(cast(g AS numeric)) win_percentage from teams\n", " group by teamid\n", " \"\"\"\n", "pd.read_sql_query(query, cnx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 14 .rank() the total spend by team each year against their actual rank that year. Is there a correlation of spend to performance?" ] }, { "cell_type": "code", "execution_count": 170, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
teamidyearidspend_rankrank
0 ATL 1985 1 5
1 CAL 1985 2 2
2 NYA 1985 3 2
3 CHN 1985 4 4
4 SLN 1985 5 1
5 BAL 1985 6 4
6 ML4 1985 7 6
7 SDN 1985 8 3
8 LAN 1985 9 1
9 BOS 1985 10 5
10 NYN 1985 11 2
11 DET 1985 12 3
12 PHI 1985 13 5
13 HOU 1985 14 3
14 CHA 1985 15 3
15 MON 1985 16 3
16 KCA 1985 17 1
17 PIT 1985 18 6
18 OAK 1985 19 4
19 TOR 1985 20 1
20 CIN 1985 21 2
21 SFN 1985 22 6
22 TEX 1985 23 7
23 CLE 1985 24 7
24 MIN 1985 25 4
25 SEA 1985 26 6
26 NYA 1986 1 2
27 CHN 1986 2 5
28 ATL 1986 3 6
29 NYN 1986 4 1
...............
828 LAN 2014 1 1
829 NYA 2014 2 2
830 PHI 2014 3 5
831 DET 2014 4 1
832 BOS 2014 6 5
833 WAS 2014 7 1
834 LAA 2014 8 1
835 SLN 2014 9 1
836 TEX 2014 10 5
837 TOR 2014 11 3
838 CIN 2014 12 4
839 BAL 2014 13 1
840 MIL 2014 14 3
841 ARI 2014 15 5
842 ATL 2014 16 2
843 COL 2014 17 4
844 SEA 2014 18 3
845 MIN 2014 19 5
846 CLE 2014 20 3
847 CHA 2014 21 4
848 PIT 2014 22 2
849 SDN 2014 23 3
850 KCA 2014 24 2
851 TBA 2014 25 4
852 OAK 2014 26 2
853 CHN 2014 27 5
854 MIA 2014 29 4
855 HOU 2014 30 4
856 NYN 2014 31 3
857 SFN 2014 32 2
\n", "

858 rows × 4 columns

\n", "
" ], "text/plain": [ " teamid yearid spend_rank rank\n", "0 ATL 1985 1 5\n", "1 CAL 1985 2 2\n", "2 NYA 1985 3 2\n", "3 CHN 1985 4 4\n", "4 SLN 1985 5 1\n", "5 BAL 1985 6 4\n", "6 ML4 1985 7 6\n", "7 SDN 1985 8 3\n", "8 LAN 1985 9 1\n", "9 BOS 1985 10 5\n", "10 NYN 1985 11 2\n", "11 DET 1985 12 3\n", "12 PHI 1985 13 5\n", "13 HOU 1985 14 3\n", "14 CHA 1985 15 3\n", "15 MON 1985 16 3\n", "16 KCA 1985 17 1\n", "17 PIT 1985 18 6\n", "18 OAK 1985 19 4\n", "19 TOR 1985 20 1\n", "20 CIN 1985 21 2\n", "21 SFN 1985 22 6\n", "22 TEX 1985 23 7\n", "23 CLE 1985 24 7\n", "24 MIN 1985 25 4\n", "25 SEA 1985 26 6\n", "26 NYA 1986 1 2\n", "27 CHN 1986 2 5\n", "28 ATL 1986 3 6\n", "29 NYN 1986 4 1\n", ".. ... ... ... ...\n", "828 LAN 2014 1 1\n", "829 NYA 2014 2 2\n", "830 PHI 2014 3 5\n", "831 DET 2014 4 1\n", "832 BOS 2014 6 5\n", "833 WAS 2014 7 1\n", "834 LAA 2014 8 1\n", "835 SLN 2014 9 1\n", "836 TEX 2014 10 5\n", "837 TOR 2014 11 3\n", "838 CIN 2014 12 4\n", "839 BAL 2014 13 1\n", "840 MIL 2014 14 3\n", "841 ARI 2014 15 5\n", "842 ATL 2014 16 2\n", "843 COL 2014 17 4\n", "844 SEA 2014 18 3\n", "845 MIN 2014 19 5\n", "846 CLE 2014 20 3\n", "847 CHA 2014 21 4\n", "848 PIT 2014 22 2\n", "849 SDN 2014 23 3\n", "850 KCA 2014 24 2\n", "851 TBA 2014 25 4\n", "852 OAK 2014 26 2\n", "853 CHN 2014 27 5\n", "854 MIA 2014 29 4\n", "855 HOU 2014 30 4\n", "856 NYN 2014 31 3\n", "857 SFN 2014 32 2\n", "\n", "[858 rows x 4 columns]" ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\" select a.teamid, a.yearid, a.spend_rank, teams.rank\n", " from teams inner join \n", " \n", " (select teamid, yearid, sum(salary) spend,\n", " rank() over (PARTITION BY yearid ORDER BY sum(salary) DESC) spend_rank\n", " from salaries\n", " group by teamid, yearid) a\n", " on a.teamid=teams.teamid and a.yearid=teams.yearid\n", " \"\"\"\n", "pd.read_sql_query(query, cnx)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }