{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"source": [
"### Source of Data\n",
"# Jeff Sackmann data at https://www.jeffsackmann.com/"
],
"metadata": {
"id": "Gxa3PZKrpXdq"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "9IOGiq0zVEbE"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hiBTJctzVEbM",
"outputId": "83d8325f-50c5-4d22-9889-e4f10be0b504"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"list"
]
},
"metadata": {},
"execution_count": 2
}
],
"source": [
"# Combine data from years 2000-2019\n",
"url = 'https://raw.githubusercontent.com/JeffSackmann/tennis_wta/master/'\n",
"df_list = [pd.read_csv(url + 'wta_matches_' + str(year) + '.csv') for year in range(2000,2020)]\n",
"\n",
"type(df_list)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "gLePVOrOVEbS",
"outputId": "39025098-e3cc-4737-e32c-a7fac88792c8"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" week ranking player_id ranking_points tours\n",
"0 20000101 1 200001 6074.0 NaN\n",
"1 20000103 1 200001 6074.0 NaN\n",
"2 20000110 1 200001 6074.0 NaN\n",
"3 20000117 1 200001 6003.0 NaN\n",
"4 20000124 1 200001 6003.0 NaN\n",
"... ... ... ... ... ...\n",
"1207230 20141110 1242 223123 3.0 NaN\n",
"1207231 20191209 1047 223179 NaN NaN\n",
"1207232 20191216 996 223179 NaN NaN\n",
"1207233 20191223 948 223179 NaN NaN\n",
"1207234 20191230 949 223179 NaN NaN\n",
"\n",
"[1207235 rows x 5 columns]"
],
"text/html": [
"\n",
"
\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" week | \n",
" ranking | \n",
" player_id | \n",
" ranking_points | \n",
" tours | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20000101 | \n",
" 1 | \n",
" 200001 | \n",
" 6074.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 20000103 | \n",
" 1 | \n",
" 200001 | \n",
" 6074.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 20000110 | \n",
" 1 | \n",
" 200001 | \n",
" 6074.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" 20000117 | \n",
" 1 | \n",
" 200001 | \n",
" 6003.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 4 | \n",
" 20000124 | \n",
" 1 | \n",
" 200001 | \n",
" 6003.0 | \n",
" NaN | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1207230 | \n",
" 20141110 | \n",
" 1242 | \n",
" 223123 | \n",
" 3.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 1207231 | \n",
" 20191209 | \n",
" 1047 | \n",
" 223179 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1207232 | \n",
" 20191216 | \n",
" 996 | \n",
" 223179 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1207233 | \n",
" 20191223 | \n",
" 948 | \n",
" 223179 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1207234 | \n",
" 20191230 | \n",
" 949 | \n",
" 223179 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
1207235 rows × 5 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 3
}
],
"source": [
"# Player rankings from 2000-2019\n",
"\n",
"rankings_10s = pd.read_csv(url + 'wta_rankings_10s.csv')\n",
"rankings_10s.columns = ['week', 'ranking', 'player_id', 'ranking_points', 'tours']\n",
"\n",
"rankings_00s = pd.read_csv(url + 'wta_rankings_00s.csv')\n",
"rankings_00s.columns = ['week', 'ranking', 'player_id', 'ranking_points', 'tours']\n",
"\n",
"rankings = pd.concat([rankings_00s, rankings_10s])\n",
"\n",
"rankings.index = range(rankings.shape[0])\n",
"\n",
"rankings"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SUoFPWMuVEbS",
"outputId": "445032f9-b355-4e67-8b04-e701870df050"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 1207235 entries, 0 to 1207234\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 week 1207235 non-null int64 \n",
" 1 ranking 1207235 non-null int64 \n",
" 2 player_id 1207235 non-null int64 \n",
" 3 ranking_points 1207231 non-null float64\n",
" 4 tours 618226 non-null float64\n",
"dtypes: float64(2), int64(3)\n",
"memory usage: 46.1 MB\n"
]
}
],
"source": [
"rankings.info()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hO8FCwX9VEbT",
"outputId": "7caf0892-1d24-4b97-c631-b86183d5f592"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"type(rankings['week'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "bR310QpLVEbT"
},
"outputs": [],
"source": [
"# Convert ranking dates to datetime\n",
"rankings['week'] = pd.to_datetime(rankings['week'], format = '%Y%m%d')"
]
},
{
"cell_type": "code",
"source": [
"player_df = pd.read_csv('https://raw.githubusercontent.com/JeffSackmann/tennis_wta/master/wta_players.csv')\n",
"player_df.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f-ThpbV6thFN",
"outputId": "f55a4d28-48e6-4b9f-bfde-899f8335b8c0"
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 60027 entries, 0 to 60026\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 player_id 60027 non-null int64 \n",
" 1 name_first 56704 non-null object \n",
" 2 name_last 60027 non-null object \n",
" 3 hand 60024 non-null object \n",
" 4 dob 25213 non-null float64\n",
" 5 ioc 59034 non-null object \n",
" 6 height 1338 non-null float64\n",
" 7 wikidata_id 3707 non-null object \n",
"dtypes: float64(2), int64(1), object(5)\n",
"memory usage: 3.7+ MB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"joined_df = pd.merge(rankings, player_df, on = 'player_id', how = 'left')"
],
"metadata": {
"id": "3CN6sUHXb_qI"
},
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"source": [
"joined_df.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TsuDMYyjb_hq",
"outputId": "2bb6998e-a470-4603-804b-9a31cc2be261"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"Int64Index: 1207235 entries, 0 to 1207234\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 week 1207235 non-null datetime64[ns]\n",
" 1 ranking 1207235 non-null int64 \n",
" 2 player_id 1207235 non-null int64 \n",
" 3 ranking_points 1207231 non-null float64 \n",
" 4 tours 618226 non-null float64 \n",
" 5 name_first 1207235 non-null object \n",
" 6 name_last 1207235 non-null object \n",
" 7 hand 1207235 non-null object \n",
" 8 dob 1207235 non-null float64 \n",
" 9 ioc 1207235 non-null object \n",
" 10 height 444146 non-null float64 \n",
" 11 wikidata_id 706520 non-null object \n",
"dtypes: datetime64[ns](1), float64(4), int64(2), object(5)\n",
"memory usage: 119.7+ MB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"joined_df.head(100)"
],
"metadata": {
"id": "PlLyoO7xb_e1",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"outputId": "cce28579-db64-407b-8bcb-c7391ac90d32"
},
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" week ranking player_id ranking_points tours name_first name_last \\\n",
"0 2000-01-01 1 200001 6074.0 NaN Martina Hingis \n",
"1 2000-01-03 1 200001 6074.0 NaN Martina Hingis \n",
"2 2000-01-10 1 200001 6074.0 NaN Martina Hingis \n",
"3 2000-01-17 1 200001 6003.0 NaN Martina Hingis \n",
"4 2000-01-24 1 200001 6003.0 NaN Martina Hingis \n",
".. ... ... ... ... ... ... ... \n",
"95 2001-10-22 2 200001 4842.0 NaN Martina Hingis \n",
"96 2001-10-29 3 200001 4586.0 NaN Martina Hingis \n",
"97 2001-11-05 4 200001 3944.0 NaN Martina Hingis \n",
"98 2001-11-12 4 200001 3944.0 NaN Martina Hingis \n",
"99 2001-11-19 4 200001 3944.0 NaN Martina Hingis \n",
"\n",
" hand dob ioc height wikidata_id \n",
"0 R 19800930.0 SUI 170.0 Q134720 \n",
"1 R 19800930.0 SUI 170.0 Q134720 \n",
"2 R 19800930.0 SUI 170.0 Q134720 \n",
"3 R 19800930.0 SUI 170.0 Q134720 \n",
"4 R 19800930.0 SUI 170.0 Q134720 \n",
".. ... ... ... ... ... \n",
"95 R 19800930.0 SUI 170.0 Q134720 \n",
"96 R 19800930.0 SUI 170.0 Q134720 \n",
"97 R 19800930.0 SUI 170.0 Q134720 \n",
"98 R 19800930.0 SUI 170.0 Q134720 \n",
"99 R 19800930.0 SUI 170.0 Q134720 \n",
"\n",
"[100 rows x 12 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" week | \n",
" ranking | \n",
" player_id | \n",
" ranking_points | \n",
" tours | \n",
" name_first | \n",
" name_last | \n",
" hand | \n",
" dob | \n",
" ioc | \n",
" height | \n",
" wikidata_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2000-01-01 | \n",
" 1 | \n",
" 200001 | \n",
" 6074.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 1 | \n",
" 2000-01-03 | \n",
" 1 | \n",
" 200001 | \n",
" 6074.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 2 | \n",
" 2000-01-10 | \n",
" 1 | \n",
" 200001 | \n",
" 6074.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 3 | \n",
" 2000-01-17 | \n",
" 1 | \n",
" 200001 | \n",
" 6003.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 4 | \n",
" 2000-01-24 | \n",
" 1 | \n",
" 200001 | \n",
" 6003.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 95 | \n",
" 2001-10-22 | \n",
" 2 | \n",
" 200001 | \n",
" 4842.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 96 | \n",
" 2001-10-29 | \n",
" 3 | \n",
" 200001 | \n",
" 4586.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 97 | \n",
" 2001-11-05 | \n",
" 4 | \n",
" 200001 | \n",
" 3944.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 98 | \n",
" 2001-11-12 | \n",
" 4 | \n",
" 200001 | \n",
" 3944.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
" 99 | \n",
" 2001-11-19 | \n",
" 4 | \n",
" 200001 | \n",
" 3944.0 | \n",
" NaN | \n",
" Martina | \n",
" Hingis | \n",
" R | \n",
" 19800930.0 | \n",
" SUI | \n",
" 170.0 | \n",
" Q134720 | \n",
"
\n",
" \n",
"
\n",
"
100 rows × 12 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"keep_column=['ranking', 'player_id', 'name_first', 'name_last']\n",
"df2=joined_df[keep_column]\n",
"df2\n"
],
"metadata": {
"id": "wdfMZRJwvkOD",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"outputId": "0afcd112-2b9d-44a7-cc74-a46948f32245"
},
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" ranking player_id name_first name_last\n",
"0 1 200001 Martina Hingis\n",
"1 1 200001 Martina Hingis\n",
"2 1 200001 Martina Hingis\n",
"3 1 200001 Martina Hingis\n",
"4 1 200001 Martina Hingis\n",
"... ... ... ... ...\n",
"1207230 1242 223123 Jia Xiang Lu\n",
"1207231 1047 223179 Marion Viertler\n",
"1207232 996 223179 Marion Viertler\n",
"1207233 948 223179 Marion Viertler\n",
"1207234 949 223179 Marion Viertler\n",
"\n",
"[1207235 rows x 4 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ranking | \n",
" player_id | \n",
" name_first | \n",
" name_last | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1207230 | \n",
" 1242 | \n",
" 223123 | \n",
" Jia Xiang | \n",
" Lu | \n",
"
\n",
" \n",
" 1207231 | \n",
" 1047 | \n",
" 223179 | \n",
" Marion | \n",
" Viertler | \n",
"
\n",
" \n",
" 1207232 | \n",
" 996 | \n",
" 223179 | \n",
" Marion | \n",
" Viertler | \n",
"
\n",
" \n",
" 1207233 | \n",
" 948 | \n",
" 223179 | \n",
" Marion | \n",
" Viertler | \n",
"
\n",
" \n",
" 1207234 | \n",
" 949 | \n",
" 223179 | \n",
" Marion | \n",
" Viertler | \n",
"
\n",
" \n",
"
\n",
"
1207235 rows × 4 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"df3=df2.loc[(df2['ranking'] ==1)]\n",
"df3"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "8l0q6noxTtFp",
"outputId": "03a07430-3c02-4c27-afce-8e9b34645d56"
},
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" ranking player_id name_first name_last\n",
"0 1 200001 Martina Hingis\n",
"1 1 200001 Martina Hingis\n",
"2 1 200001 Martina Hingis\n",
"3 1 200001 Martina Hingis\n",
"4 1 200001 Martina Hingis\n",
"... ... ... ... ...\n",
"1199804 1 202458 Ashleigh Barty\n",
"1201123 1 202458 Ashleigh Barty\n",
"1202444 1 202458 Ashleigh Barty\n",
"1203765 1 202458 Ashleigh Barty\n",
"1205087 1 202458 Ashleigh Barty\n",
"\n",
"[1031 rows x 4 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ranking | \n",
" player_id | \n",
" name_first | \n",
" name_last | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 200001 | \n",
" Martina | \n",
" Hingis | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1199804 | \n",
" 1 | \n",
" 202458 | \n",
" Ashleigh | \n",
" Barty | \n",
"
\n",
" \n",
" 1201123 | \n",
" 1 | \n",
" 202458 | \n",
" Ashleigh | \n",
" Barty | \n",
"
\n",
" \n",
" 1202444 | \n",
" 1 | \n",
" 202458 | \n",
" Ashleigh | \n",
" Barty | \n",
"
\n",
" \n",
" 1203765 | \n",
" 1 | \n",
" 202458 | \n",
" Ashleigh | \n",
" Barty | \n",
"
\n",
" \n",
" 1205087 | \n",
" 1 | \n",
" 202458 | \n",
" Ashleigh | \n",
" Barty | \n",
"
\n",
" \n",
"
\n",
"
1031 rows × 4 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "code",
"source": [
"checktosee = df3.groupby(['ranking', 'name_last', 'name_first']).count()\n",
"checktosee"
],
"metadata": {
"id": "UWULWeCpNfIq",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 708
},
"outputId": "ddbe155f-1354-4e40-82a0-7b27ed75a1a9"
},
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" player_id\n",
"ranking name_last name_first \n",
"1 Azarenka Victoria 52\n",
" Barty Ashleigh 22\n",
" Capriati Jennifer 17\n",
" Clijsters Kim 20\n",
" Davenport Lindsay 79\n",
" Halep Simona 52\n",
" Henin Justine 118\n",
" Hingis Martina 88\n",
" Ivanovic Ana 12\n",
" Jankovic Jelena 19\n",
" Kerber Angelique 32\n",
" Mauresmo Amelie 39\n",
" Muguruza Garbine 4\n",
" Osaka Naomi 21\n",
" Pliskova Karolina 7\n",
" Safina Dinara 26\n",
" Sharapova Maria 21\n",
" Williams Serena 318\n",
" Venus 11\n",
" Wozniacki Caroline 73"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" | \n",
" player_id | \n",
"
\n",
" \n",
" ranking | \n",
" name_last | \n",
" name_first | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Azarenka | \n",
" Victoria | \n",
" 52 | \n",
"
\n",
" \n",
" Barty | \n",
" Ashleigh | \n",
" 22 | \n",
"
\n",
" \n",
" Capriati | \n",
" Jennifer | \n",
" 17 | \n",
"
\n",
" \n",
" Clijsters | \n",
" Kim | \n",
" 20 | \n",
"
\n",
" \n",
" Davenport | \n",
" Lindsay | \n",
" 79 | \n",
"
\n",
" \n",
" Halep | \n",
" Simona | \n",
" 52 | \n",
"
\n",
" \n",
" Henin | \n",
" Justine | \n",
" 118 | \n",
"
\n",
" \n",
" Hingis | \n",
" Martina | \n",
" 88 | \n",
"
\n",
" \n",
" Ivanovic | \n",
" Ana | \n",
" 12 | \n",
"
\n",
" \n",
" Jankovic | \n",
" Jelena | \n",
" 19 | \n",
"
\n",
" \n",
" Kerber | \n",
" Angelique | \n",
" 32 | \n",
"
\n",
" \n",
" Mauresmo | \n",
" Amelie | \n",
" 39 | \n",
"
\n",
" \n",
" Muguruza | \n",
" Garbine | \n",
" 4 | \n",
"
\n",
" \n",
" Osaka | \n",
" Naomi | \n",
" 21 | \n",
"
\n",
" \n",
" Pliskova | \n",
" Karolina | \n",
" 7 | \n",
"
\n",
" \n",
" Safina | \n",
" Dinara | \n",
" 26 | \n",
"
\n",
" \n",
" Sharapova | \n",
" Maria | \n",
" 21 | \n",
"
\n",
" \n",
" Williams | \n",
" Serena | \n",
" 318 | \n",
"
\n",
" \n",
" Venus | \n",
" 11 | \n",
"
\n",
" \n",
" Wozniacki | \n",
" Caroline | \n",
" 73 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 14
}
]
},
{
"cell_type": "code",
"source": [
"checktosee.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WPLMt5_EVOm1",
"outputId": "c8329db7-f8d8-4af3-b39a-7fee69ab1699"
},
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"MultiIndex: 20 entries, (1, 'Azarenka', 'Victoria') to (1, 'Wozniacki', 'Caroline')\n",
"Data columns (total 1 columns):\n",
" # Column Non-Null Count Dtype\n",
"--- ------ -------------- -----\n",
" 0 player_id 20 non-null int64\n",
"dtypes: int64(1)\n",
"memory usage: 713.0+ bytes\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"checktosee.index"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WWKLbXTeVWR4",
"outputId": "62910dcf-8981-4269-9c30-b03413b7ae33"
},
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"MultiIndex([(1, 'Azarenka', 'Victoria'),\n",
" (1, 'Barty', 'Ashleigh'),\n",
" (1, 'Capriati', 'Jennifer'),\n",
" (1, 'Clijsters', 'Kim'),\n",
" (1, 'Davenport', 'Lindsay'),\n",
" (1, 'Halep', 'Simona'),\n",
" (1, 'Henin', 'Justine'),\n",
" (1, 'Hingis', 'Martina'),\n",
" (1, 'Ivanovic', 'Ana'),\n",
" (1, 'Jankovic', 'Jelena'),\n",
" (1, 'Kerber', 'Angelique'),\n",
" (1, 'Mauresmo', 'Amelie'),\n",
" (1, 'Muguruza', 'Garbine'),\n",
" (1, 'Osaka', 'Naomi'),\n",
" (1, 'Pliskova', 'Karolina'),\n",
" (1, 'Safina', 'Dinara'),\n",
" (1, 'Sharapova', 'Maria'),\n",
" (1, 'Williams', 'Serena'),\n",
" (1, 'Williams', 'Venus'),\n",
" (1, 'Wozniacki', 'Caroline')],\n",
" names=['ranking', 'name_last', 'name_first'])"
]
},
"metadata": {},
"execution_count": 16
}
]
},
{
"cell_type": "code",
"source": [
"checktosee.reset_index(inplace=True)"
],
"metadata": {
"id": "V9-TaOUkWf25"
},
"execution_count": 17,
"outputs": []
},
{
"cell_type": "code",
"source": [
"checktosee"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 677
},
"id": "0wwbi39HWgYY",
"outputId": "00465e1a-c1e1-418a-9b6e-3fb3e4bbb295"
},
"execution_count": 18,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" ranking name_last name_first player_id\n",
"0 1 Azarenka Victoria 52\n",
"1 1 Barty Ashleigh 22\n",
"2 1 Capriati Jennifer 17\n",
"3 1 Clijsters Kim 20\n",
"4 1 Davenport Lindsay 79\n",
"5 1 Halep Simona 52\n",
"6 1 Henin Justine 118\n",
"7 1 Hingis Martina 88\n",
"8 1 Ivanovic Ana 12\n",
"9 1 Jankovic Jelena 19\n",
"10 1 Kerber Angelique 32\n",
"11 1 Mauresmo Amelie 39\n",
"12 1 Muguruza Garbine 4\n",
"13 1 Osaka Naomi 21\n",
"14 1 Pliskova Karolina 7\n",
"15 1 Safina Dinara 26\n",
"16 1 Sharapova Maria 21\n",
"17 1 Williams Serena 318\n",
"18 1 Williams Venus 11\n",
"19 1 Wozniacki Caroline 73"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ranking | \n",
" name_last | \n",
" name_first | \n",
" player_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Azarenka | \n",
" Victoria | \n",
" 52 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" Barty | \n",
" Ashleigh | \n",
" 22 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" Capriati | \n",
" Jennifer | \n",
" 17 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" Clijsters | \n",
" Kim | \n",
" 20 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" Davenport | \n",
" Lindsay | \n",
" 79 | \n",
"
\n",
" \n",
" 5 | \n",
" 1 | \n",
" Halep | \n",
" Simona | \n",
" 52 | \n",
"
\n",
" \n",
" 6 | \n",
" 1 | \n",
" Henin | \n",
" Justine | \n",
" 118 | \n",
"
\n",
" \n",
" 7 | \n",
" 1 | \n",
" Hingis | \n",
" Martina | \n",
" 88 | \n",
"
\n",
" \n",
" 8 | \n",
" 1 | \n",
" Ivanovic | \n",
" Ana | \n",
" 12 | \n",
"
\n",
" \n",
" 9 | \n",
" 1 | \n",
" Jankovic | \n",
" Jelena | \n",
" 19 | \n",
"
\n",
" \n",
" 10 | \n",
" 1 | \n",
" Kerber | \n",
" Angelique | \n",
" 32 | \n",
"
\n",
" \n",
" 11 | \n",
" 1 | \n",
" Mauresmo | \n",
" Amelie | \n",
" 39 | \n",
"
\n",
" \n",
" 12 | \n",
" 1 | \n",
" Muguruza | \n",
" Garbine | \n",
" 4 | \n",
"
\n",
" \n",
" 13 | \n",
" 1 | \n",
" Osaka | \n",
" Naomi | \n",
" 21 | \n",
"
\n",
" \n",
" 14 | \n",
" 1 | \n",
" Pliskova | \n",
" Karolina | \n",
" 7 | \n",
"
\n",
" \n",
" 15 | \n",
" 1 | \n",
" Safina | \n",
" Dinara | \n",
" 26 | \n",
"
\n",
" \n",
" 16 | \n",
" 1 | \n",
" Sharapova | \n",
" Maria | \n",
" 21 | \n",
"
\n",
" \n",
" 17 | \n",
" 1 | \n",
" Williams | \n",
" Serena | \n",
" 318 | \n",
"
\n",
" \n",
" 18 | \n",
" 1 | \n",
" Williams | \n",
" Venus | \n",
" 11 | \n",
"
\n",
" \n",
" 19 | \n",
" 1 | \n",
" Wozniacki | \n",
" Caroline | \n",
" 73 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"source": [
"checktosee['full_name'] = checktosee['name_first'] + ' ' + checktosee['name_last']"
],
"metadata": {
"id": "fsPmBUMSVUcv"
},
"execution_count": 19,
"outputs": []
},
{
"cell_type": "code",
"source": [
"checktosee2 = checktosee.sort_values(by=\"player_id\", ascending=False )"
],
"metadata": {
"id": "_3aNzPCsVKFZ"
},
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"%matplotlib inline"
],
"metadata": {
"id": "1frpwunVVLYQ"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "T_HJ6UC0owBb",
"outputId": "4a891315-dfa9-499b-d528-0e11fa8aff55"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"fig, ax = plt.subplots(figsize=(10,6))\n",
"plt.bar(checktosee2[\"full_name\"], checktosee2[\"player_id\"], color ='blue',\n",
" width = 0.8)\n",
"plt.title(\"WTA the 1st Ranked Players\", size = 25)\n",
"plt.ylabel(\"# of Weeks as the 1st Ranked\", size=15)\n",
"plt.xlabel(\"Players\", size=15) \n",
"plt.xticks(rotation = 82, size=20)\n",
"plt.tight_layout()\n",
"\n",
"#ax.set_ylabel(player_id\"])\n",
"\n",
"plt.show()\n",
"fig.savefig('/content/drive/My Drive/Colab Notebooks/tennis/Tennis_Predicting-master/womenfirst.png', dpi=500, bbox_inches='tight')\n"
],
"metadata": {
"id": "5fECX41qyZU1",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 445
},
"outputId": "8301de84-9ef1-48db-cdfc-0bcb5c5de4f1"
},
"execution_count": 32,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"