{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "89317480",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Platform | \n",
" Year_of_Release | \n",
" Genre | \n",
" Publisher | \n",
" NA_Sales | \n",
" EU_Sales | \n",
" JP_Sales | \n",
" Other_Sales | \n",
" Global_Sales | \n",
" Critic_Score | \n",
" Critic_Count | \n",
" User_Score | \n",
" User_Count | \n",
" Developer | \n",
" Rating | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Wii Sports | \n",
" Wii | \n",
" 2006.0 | \n",
" Sports | \n",
" Nintendo | \n",
" 41.36 | \n",
" 28.96 | \n",
" 3.77 | \n",
" 8.45 | \n",
" 82.53 | \n",
" 76.0 | \n",
" 51.0 | \n",
" 8 | \n",
" 322.0 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 1 | \n",
" Super Mario Bros. | \n",
" NES | \n",
" 1985.0 | \n",
" Platform | \n",
" Nintendo | \n",
" 29.08 | \n",
" 3.58 | \n",
" 6.81 | \n",
" 0.77 | \n",
" 40.24 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" Mario Kart Wii | \n",
" Wii | \n",
" 2008.0 | \n",
" Racing | \n",
" Nintendo | \n",
" 15.68 | \n",
" 12.76 | \n",
" 3.79 | \n",
" 3.29 | \n",
" 35.52 | \n",
" 82.0 | \n",
" 73.0 | \n",
" 8.3 | \n",
" 709.0 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 3 | \n",
" Wii Sports Resort | \n",
" Wii | \n",
" 2009.0 | \n",
" Sports | \n",
" Nintendo | \n",
" 15.61 | \n",
" 10.93 | \n",
" 3.28 | \n",
" 2.95 | \n",
" 32.77 | \n",
" 80.0 | \n",
" 73.0 | \n",
" 8 | \n",
" 192.0 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 4 | \n",
" Pokemon Red/Pokemon Blue | \n",
" GB | \n",
" 1996.0 | \n",
" Role-Playing | \n",
" Nintendo | \n",
" 11.27 | \n",
" 8.89 | \n",
" 10.22 | \n",
" 1.00 | \n",
" 31.37 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Platform Year_of_Release Genre Publisher \\\n",
"0 Wii Sports Wii 2006.0 Sports Nintendo \n",
"1 Super Mario Bros. NES 1985.0 Platform Nintendo \n",
"2 Mario Kart Wii Wii 2008.0 Racing Nintendo \n",
"3 Wii Sports Resort Wii 2009.0 Sports Nintendo \n",
"4 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing Nintendo \n",
"\n",
" NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Critic_Score \\\n",
"0 41.36 28.96 3.77 8.45 82.53 76.0 \n",
"1 29.08 3.58 6.81 0.77 40.24 NaN \n",
"2 15.68 12.76 3.79 3.29 35.52 82.0 \n",
"3 15.61 10.93 3.28 2.95 32.77 80.0 \n",
"4 11.27 8.89 10.22 1.00 31.37 NaN \n",
"\n",
" Critic_Count User_Score User_Count Developer Rating \n",
"0 51.0 8 322.0 Nintendo E \n",
"1 NaN NaN NaN NaN NaN \n",
"2 73.0 8.3 709.0 Nintendo E \n",
"3 73.0 8 192.0 Nintendo E \n",
"4 NaN NaN NaN NaN NaN "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df = pd.read_csv('vgsales.csv')\n",
"\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e8bbca54",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Platform | \n",
" Year_of_Release | \n",
" Genre | \n",
" Publisher | \n",
" NA_Sales | \n",
" EU_Sales | \n",
" JP_Sales | \n",
" Other_Sales | \n",
" Global_Sales | \n",
" Critic_Score | \n",
" Critic_Count | \n",
" User_Score | \n",
" User_Count | \n",
" Developer | \n",
" Rating | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Wii Sports | \n",
" Wii | \n",
" 2006.0 | \n",
" Sports | \n",
" Nintendo | \n",
" 41.36 | \n",
" 28.96 | \n",
" 3.77 | \n",
" 8.45 | \n",
" 82.53 | \n",
" 76.0 | \n",
" 51.0 | \n",
" 8 | \n",
" 322.0 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 1 | \n",
" Super Mario Bros. | \n",
" NES | \n",
" 1985.0 | \n",
" Platform | \n",
" Nintendo | \n",
" 29.08 | \n",
" 3.58 | \n",
" 6.81 | \n",
" 0.77 | \n",
" 40.24 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" Mario Kart Wii | \n",
" Wii | \n",
" 2008.0 | \n",
" Racing | \n",
" Nintendo | \n",
" 15.68 | \n",
" 12.76 | \n",
" 3.79 | \n",
" 3.29 | \n",
" 35.52 | \n",
" 82.0 | \n",
" 73.0 | \n",
" 8.3 | \n",
" 709.0 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 3 | \n",
" Wii Sports Resort | \n",
" Wii | \n",
" 2009.0 | \n",
" Sports | \n",
" Nintendo | \n",
" 15.61 | \n",
" 10.93 | \n",
" 3.28 | \n",
" 2.95 | \n",
" 32.77 | \n",
" 80.0 | \n",
" 73.0 | \n",
" 8 | \n",
" 192.0 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 4 | \n",
" Pokemon Red/Pokemon Blue | \n",
" GB | \n",
" 1996.0 | \n",
" Role-Playing | \n",
" Nintendo | \n",
" 11.27 | \n",
" 8.89 | \n",
" 10.22 | \n",
" 1.00 | \n",
" 31.37 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Platform Year_of_Release Genre Publisher \\\n",
"0 Wii Sports Wii 2006.0 Sports Nintendo \n",
"1 Super Mario Bros. NES 1985.0 Platform Nintendo \n",
"2 Mario Kart Wii Wii 2008.0 Racing Nintendo \n",
"3 Wii Sports Resort Wii 2009.0 Sports Nintendo \n",
"4 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing Nintendo \n",
"\n",
" NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Critic_Score \\\n",
"0 41.36 28.96 3.77 8.45 82.53 76.0 \n",
"1 29.08 3.58 6.81 0.77 40.24 NaN \n",
"2 15.68 12.76 3.79 3.29 35.52 82.0 \n",
"3 15.61 10.93 3.28 2.95 32.77 80.0 \n",
"4 11.27 8.89 10.22 1.00 31.37 NaN \n",
"\n",
" Critic_Count User_Score User_Count Developer Rating \n",
"0 51.0 8 322.0 Nintendo E \n",
"1 NaN NaN NaN NaN NaN \n",
"2 73.0 8.3 709.0 Nintendo E \n",
"3 73.0 8 192.0 Nintendo E \n",
"4 NaN NaN NaN NaN NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a7ee67ff",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Platform | \n",
" Year_of_Release | \n",
" Genre | \n",
" Publisher | \n",
" NA_Sales | \n",
" EU_Sales | \n",
" JP_Sales | \n",
" Other_Sales | \n",
" Global_Sales | \n",
" Critic_Score | \n",
" Critic_Count | \n",
" User_Score | \n",
" User_Count | \n",
" Developer | \n",
" Rating | \n",
"
\n",
" \n",
" \n",
" \n",
" 16714 | \n",
" Samurai Warriors: Sanada Maru | \n",
" PS3 | \n",
" 2016.0 | \n",
" Action | \n",
" Tecmo Koei | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.01 | \n",
" 0.0 | \n",
" 0.01 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 16715 | \n",
" LMA Manager 2007 | \n",
" X360 | \n",
" 2006.0 | \n",
" Sports | \n",
" Codemasters | \n",
" 0.00 | \n",
" 0.01 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 0.01 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 16716 | \n",
" Haitaka no Psychedelica | \n",
" PSV | \n",
" 2016.0 | \n",
" Adventure | \n",
" Idea Factory | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.01 | \n",
" 0.0 | \n",
" 0.01 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 16717 | \n",
" Spirits & Spells | \n",
" GBA | \n",
" 2003.0 | \n",
" Platform | \n",
" Wanadoo | \n",
" 0.01 | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 0.01 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 16718 | \n",
" Winning Post 8 2016 | \n",
" PSV | \n",
" 2016.0 | \n",
" Simulation | \n",
" Tecmo Koei | \n",
" 0.00 | \n",
" 0.00 | \n",
" 0.01 | \n",
" 0.0 | \n",
" 0.01 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Platform Year_of_Release Genre \\\n",
"16714 Samurai Warriors: Sanada Maru PS3 2016.0 Action \n",
"16715 LMA Manager 2007 X360 2006.0 Sports \n",
"16716 Haitaka no Psychedelica PSV 2016.0 Adventure \n",
"16717 Spirits & Spells GBA 2003.0 Platform \n",
"16718 Winning Post 8 2016 PSV 2016.0 Simulation \n",
"\n",
" Publisher NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales \\\n",
"16714 Tecmo Koei 0.00 0.00 0.01 0.0 0.01 \n",
"16715 Codemasters 0.00 0.01 0.00 0.0 0.01 \n",
"16716 Idea Factory 0.00 0.00 0.01 0.0 0.01 \n",
"16717 Wanadoo 0.01 0.00 0.00 0.0 0.01 \n",
"16718 Tecmo Koei 0.00 0.00 0.01 0.0 0.01 \n",
"\n",
" Critic_Score Critic_Count User_Score User_Count Developer Rating \n",
"16714 NaN NaN NaN NaN NaN NaN \n",
"16715 NaN NaN NaN NaN NaN NaN \n",
"16716 NaN NaN NaN NaN NaN NaN \n",
"16717 NaN NaN NaN NaN NaN NaN \n",
"16718 NaN NaN NaN NaN NaN NaN "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9fc86666",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Platform | \n",
" Year_of_Release | \n",
" Genre | \n",
" Publisher | \n",
" NA_Sales | \n",
" EU_Sales | \n",
" JP_Sales | \n",
" Other_Sales | \n",
" Global_Sales | \n",
" Critic_Score | \n",
" Critic_Count | \n",
" User_Score | \n",
" User_Count | \n",
" Developer | \n",
" Rating | \n",
"
\n",
" \n",
" \n",
" \n",
" 15145 | \n",
" Toukiden 2 | \n",
" PS3 | \n",
" 2016.0 | \n",
" Action | \n",
" Tecmo Koei | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.02 | \n",
" 0.0 | \n",
" 0.02 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Platform Year_of_Release Genre Publisher NA_Sales \\\n",
"15145 Toukiden 2 PS3 2016.0 Action Tecmo Koei 0.0 \n",
"\n",
" EU_Sales JP_Sales Other_Sales Global_Sales Critic_Score \\\n",
"15145 0.0 0.02 0.0 0.02 NaN \n",
"\n",
" Critic_Count User_Score User_Count Developer Rating \n",
"15145 NaN NaN NaN NaN NaN "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sample()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "aae19a21",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(16719, 16)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "249e4d97",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Name object\n",
"Platform object\n",
"Year_of_Release float64\n",
"Genre object\n",
"Publisher object\n",
"NA_Sales float64\n",
"EU_Sales float64\n",
"JP_Sales float64\n",
"Other_Sales float64\n",
"Global_Sales float64\n",
"Critic_Score float64\n",
"Critic_Count float64\n",
"User_Score object\n",
"User_Count float64\n",
"Developer object\n",
"Rating object\n",
"dtype: object\n"
]
}
],
"source": [
"data_types = df.dtypes\n",
"print(data_types)\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b5b14228",
"metadata": {},
"outputs": [],
"source": [
"df['Year_of_Release'] = df['Year_of_Release'].fillna(0).astype(int)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "84b0489f",
"metadata": {},
"outputs": [],
"source": [
"df['User_Score'] = pd.to_numeric(df['User_Score'], errors='coerce')\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e3ec58f7",
"metadata": {},
"outputs": [],
"source": [
"df['Critic_Count'] = df['Critic_Count'].fillna(0).astype(int)\n",
"df['User_Count'] = df['User_Count'].fillna(0).astype(int)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0a75a4bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Missing values in the dataset:\n",
"Name 2\n",
"Platform 0\n",
"Year_of_Release 0\n",
"Genre 2\n",
"Publisher 54\n",
"NA_Sales 0\n",
"EU_Sales 0\n",
"JP_Sales 0\n",
"Other_Sales 0\n",
"Global_Sales 0\n",
"Critic_Score 8582\n",
"Critic_Count 0\n",
"User_Score 9129\n",
"User_Count 0\n",
"Developer 6623\n",
"Rating 6769\n",
"dtype: int64\n"
]
}
],
"source": [
"print(\"\\nMissing values in the dataset:\")\n",
"print(df.isnull().sum())\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "09a183b2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Platform | \n",
" Year_of_Release | \n",
" Genre | \n",
" Publisher | \n",
" NA_Sales | \n",
" EU_Sales | \n",
" JP_Sales | \n",
" Other_Sales | \n",
" Global_Sales | \n",
" Critic_Score | \n",
" Critic_Count | \n",
" User_Score | \n",
" User_Count | \n",
" Developer | \n",
" Rating | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Wii Sports | \n",
" Wii | \n",
" 2006 | \n",
" Sports | \n",
" Nintendo | \n",
" 41.36 | \n",
" 28.96 | \n",
" 3.77 | \n",
" 8.45 | \n",
" 82.53 | \n",
" 76.0 | \n",
" 51 | \n",
" 8.0 | \n",
" 322 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 1 | \n",
" Super Mario Bros. | \n",
" NES | \n",
" 1985 | \n",
" Platform | \n",
" Nintendo | \n",
" 29.08 | \n",
" 3.58 | \n",
" 6.81 | \n",
" 0.77 | \n",
" 40.24 | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" Mario Kart Wii | \n",
" Wii | \n",
" 2008 | \n",
" Racing | \n",
" Nintendo | \n",
" 15.68 | \n",
" 12.76 | \n",
" 3.79 | \n",
" 3.29 | \n",
" 35.52 | \n",
" 82.0 | \n",
" 73 | \n",
" 8.3 | \n",
" 709 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 3 | \n",
" Wii Sports Resort | \n",
" Wii | \n",
" 2009 | \n",
" Sports | \n",
" Nintendo | \n",
" 15.61 | \n",
" 10.93 | \n",
" 3.28 | \n",
" 2.95 | \n",
" 32.77 | \n",
" 80.0 | \n",
" 73 | \n",
" 8.0 | \n",
" 192 | \n",
" Nintendo | \n",
" E | \n",
"
\n",
" \n",
" 4 | \n",
" Pokemon Red/Pokemon Blue | \n",
" GB | \n",
" 1996 | \n",
" Role-Playing | \n",
" Nintendo | \n",
" 11.27 | \n",
" 8.89 | \n",
" 10.22 | \n",
" 1.00 | \n",
" 31.37 | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Platform Year_of_Release Genre Publisher \\\n",
"0 Wii Sports Wii 2006 Sports Nintendo \n",
"1 Super Mario Bros. NES 1985 Platform Nintendo \n",
"2 Mario Kart Wii Wii 2008 Racing Nintendo \n",
"3 Wii Sports Resort Wii 2009 Sports Nintendo \n",
"4 Pokemon Red/Pokemon Blue GB 1996 Role-Playing Nintendo \n",
"\n",
" NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales Critic_Score \\\n",
"0 41.36 28.96 3.77 8.45 82.53 76.0 \n",
"1 29.08 3.58 6.81 0.77 40.24 NaN \n",
"2 15.68 12.76 3.79 3.29 35.52 82.0 \n",
"3 15.61 10.93 3.28 2.95 32.77 80.0 \n",
"4 11.27 8.89 10.22 1.00 31.37 NaN \n",
"\n",
" Critic_Count User_Score User_Count Developer Rating \n",
"0 51 8.0 322 Nintendo E \n",
"1 0 NaN 0 NaN NaN \n",
"2 73 8.3 709 Nintendo E \n",
"3 73 8.0 192 Nintendo E \n",
"4 0 NaN 0 NaN NaN "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Remove rows where 'Name' or 'Genre' is missing\n",
"df.dropna(subset=['Name', 'Genre'], inplace=True)\n",
"\n",
"# Replace missing 'Publisher' values with 'Unknown'\n",
"df['Publisher'].fillna('Unknown', inplace=True)\n",
"\n",
"# Display the DataFrame to confirm changes\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "2829d272",
"metadata": {},
"outputs": [],
"source": [
"# Calculate the average difference where both scores are present\n",
"df['User_Score'] = pd.to_numeric(df['User_Score'], errors='coerce')\n",
"valid_scores = df.dropna(subset=['User_Score', 'Critic_Score'])\n",
"average_diff = (valid_scores['User_Score'] - valid_scores['Critic_Score']).mean()\n",
"\n",
"# Impute missing User_Scores with Critic_Score + average_diff\n",
"missing_user = df['User_Score'].isnull() & df['Critic_Score'].notnull()\n",
"df.loc[missing_user, 'User_Score'] = df['Critic_Score'] + average_diff\n",
"\n",
"# Impute missing Critic_Scores with User_Score - average_diff\n",
"missing_critic = df['Critic_Score'].isnull() & df['User_Score'].notnull()\n",
"df.loc[missing_critic, 'Critic_Score'] = df['User_Score'] - average_diff\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d5e648e9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Statistical details of the dataset:\n",
" Year_of_Release NA_Sales EU_Sales JP_Sales \\\n",
"count 16717.000000 16717.000000 16717.000000 16717.000000 \n",
"mean 1974.201771 0.263255 0.145010 0.077610 \n",
"std 252.545637 0.813475 0.503303 0.308836 \n",
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 2003.000000 0.000000 0.000000 0.000000 \n",
"50% 2007.000000 0.080000 0.020000 0.000000 \n",
"75% 2010.000000 0.240000 0.110000 0.040000 \n",
"max 2020.000000 41.360000 28.960000 10.220000 \n",
"\n",
" Other_Sales Global_Sales Critic_Score Critic_Count User_Score \\\n",
"count 16717.000000 16717.000000 8710.000000 16717.000000 8710.000000 \n",
"mean 0.047333 0.533462 69.002023 12.831130 5.934629 \n",
"std 0.186721 1.547956 13.481816 18.680383 5.311803 \n",
"min 0.000000 0.010000 13.000000 0.000000 -40.067393 \n",
"25% 0.000000 0.060000 61.000000 0.000000 5.900000 \n",
"50% 0.010000 0.170000 70.267393 0.000000 7.300000 \n",
"75% 0.030000 0.470000 79.000000 21.000000 8.200000 \n",
"max 10.570000 82.530000 98.000000 113.000000 26.932607 \n",
"\n",
" User_Count \n",
"count 16717.000000 \n",
"mean 73.657056 \n",
"std 386.717446 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 0.000000 \n",
"75% 20.000000 \n",
"max 10665.000000 \n"
]
}
],
"source": [
"print(\"\\nStatistical details of the dataset:\")\n",
"print(df.describe())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "88b6e4d3",
"metadata": {},
"outputs": [],
"source": [
"# Normalize Critic_Score to be out of 10\n",
"df['Normalized_Critic_Score'] = df['Critic_Score'] / 10\n",
"\n",
"# Fill missing values with 0 for calculation purposes\n",
"df['Normalized_Critic_Score'].fillna(0, inplace=True)\n",
"df['User_Score'].fillna(0, inplace=True)\n",
"df['Critic_Count'].fillna(0, inplace=True)\n",
"df['User_Count'].fillna(0, inplace=True)\n",
"\n",
"# Calculate the weighted score\n",
"df['Weighted_Rating_Score'] = df.apply(lambda x: (x['Normalized_Critic_Score'] * x['Critic_Count'] + x['User_Score'] * x['User_Count']) / (x['Critic_Count'] + x['User_Count']) if (x['Critic_Count'] + x['User_Count']) > 0 else 0, axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "7c77bc87",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Statistical details of the dataset (excluding 'Year_of_Release'):\n",
" NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales \\\n",
"count 16717.000000 16717.000000 16717.000000 16717.000000 16717.000000 \n",
"mean 0.263255 0.145010 0.077610 0.047333 0.533462 \n",
"std 0.813475 0.503303 0.308836 0.186721 1.547956 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.010000 \n",
"25% 0.000000 0.000000 0.000000 0.000000 0.060000 \n",
"50% 0.080000 0.020000 0.000000 0.010000 0.170000 \n",
"75% 0.240000 0.110000 0.040000 0.030000 0.470000 \n",
"max 41.360000 28.960000 10.220000 10.570000 82.530000 \n",
"\n",
" Critic_Score Critic_Count User_Score User_Count \\\n",
"count 8710.000000 16717.000000 16717.000000 16717.000000 \n",
"mean 69.002023 12.831130 3.092099 73.657056 \n",
"std 13.481816 18.680383 4.846648 386.717446 \n",
"min 13.000000 0.000000 -40.067393 0.000000 \n",
"25% 61.000000 0.000000 0.000000 0.000000 \n",
"50% 70.267393 0.000000 0.000000 0.000000 \n",
"75% 79.000000 21.000000 7.400000 20.000000 \n",
"max 98.000000 113.000000 26.932607 10665.000000 \n",
"\n",
" Normalized_Critic_Score Weighted_Rating_Score \n",
"count 16717.000000 16717.000000 \n",
"mean 3.595188 3.580325 \n",
"std 3.581874 3.573915 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 4.200000 4.000000 \n",
"75% 7.100000 7.173810 \n",
"max 9.800000 9.700000 \n"
]
}
],
"source": [
"# Exclude 'Year_of_Release' from the statistical summary\n",
"statistical_details = df.drop(columns='Year_of_Release').describe()\n",
"\n",
"# Print the statistical details of the dataset excluding 'Year_of_Release'\n",
"print(\"\\nStatistical details of the dataset (excluding 'Year_of_Release'):\")\n",
"print(statistical_details)\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d0d9a491",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Number of games per platform:\n",
"PS2 2161\n",
"DS 2152\n",
"PS3 1331\n",
"Wii 1320\n",
"X360 1262\n",
"PSP 1209\n",
"PS 1197\n",
"PC 974\n",
"XB 824\n",
"GBA 822\n",
"GC 556\n",
"3DS 520\n",
"PSV 432\n",
"PS4 393\n",
"N64 319\n",
"XOne 247\n",
"SNES 239\n",
"SAT 173\n",
"WiiU 147\n",
"2600 133\n",
"NES 98\n",
"GB 98\n",
"DC 52\n",
"GEN 27\n",
"NG 12\n",
"SCD 6\n",
"WS 6\n",
"3DO 3\n",
"TG16 2\n",
"GG 1\n",
"PCFX 1\n",
"Name: Platform, dtype: int64\n"
]
}
],
"source": [
"platform_counts = df['Platform'].value_counts()\n",
"print(\"\\nNumber of games per platform:\")\n",
"print(platform_counts)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "5b880db7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Number of games per genre:\n",
"Action 3370\n",
"Sports 2348\n",
"Misc 1750\n",
"Role-Playing 1500\n",
"Shooter 1323\n",
"Adventure 1303\n",
"Racing 1249\n",
"Platform 888\n",
"Simulation 874\n",
"Fighting 849\n",
"Strategy 683\n",
"Puzzle 580\n",
"Name: Genre, dtype: int64\n"
]
}
],
"source": [
"genre_counts = df['Genre'].value_counts()\n",
"print(\"\\nNumber of games per genre:\")\n",
"print(genre_counts)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "c808a1fe",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"