{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## COLLABORATIVE FILTERING ON BOOKS "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Installing Libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from numpy import int64\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"import requests\n",
"import IPython.display as Disp\n",
"import plotly.express as px\n",
"\n",
"import sklearn\n",
"from IPython.core.display import display,HTML\n",
"from sklearn.decomposition import TruncatedSVD"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data Reading "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Books data reading\n",
"book=pd.read_csv(\"C:Books.txt\") "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#Rating data reading \n",
"rating=pd.read_csv(\"C:Ratings.txt\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((10000, 23), (162604, 3))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"book.shape,rating.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" book_id | \n",
" goodreads_book_id | \n",
" best_book_id | \n",
" work_id | \n",
" books_count | \n",
" isbn | \n",
" isbn13 | \n",
" authors | \n",
" original_publication_year | \n",
" original_title | \n",
" title | \n",
" language_code | \n",
" average_rating | \n",
" ratings_count | \n",
" work_ratings_count | \n",
" work_text_reviews_count | \n",
" ratings_1 | \n",
" ratings_2 | \n",
" ratings_3 | \n",
" ratings_4 | \n",
" ratings_5 | \n",
" image_url | \n",
" small_image_url | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 2767052 | \n",
" 2767052 | \n",
" 2792775 | \n",
" 272 | \n",
" 439023483 | \n",
" 9.780439e+12 | \n",
" Suzanne Collins | \n",
" 2008.0 | \n",
" The Hunger Games | \n",
" The Hunger Games (The Hunger Games, #1) | \n",
" eng | \n",
" 4.34 | \n",
" 4780653 | \n",
" 4942365 | \n",
" 155254 | \n",
" 66715 | \n",
" 127936 | \n",
" 560092 | \n",
" 1481305 | \n",
" 2706317 | \n",
" https://images.gr-assets.com/books/1447303603m... | \n",
" https://images.gr-assets.com/books/1447303603s... | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 3 | \n",
" 3 | \n",
" 4640799 | \n",
" 491 | \n",
" 439554934 | \n",
" 9.780440e+12 | \n",
" J.K. Rowling, Mary GrandPré | \n",
" 1997.0 | \n",
" Harry Potter and the Philosopher's Stone | \n",
" Harry Potter and the Sorcerer's Stone (Harry P... | \n",
" eng | \n",
" 4.44 | \n",
" 4602479 | \n",
" 4800065 | \n",
" 75867 | \n",
" 75504 | \n",
" 101676 | \n",
" 455024 | \n",
" 1156318 | \n",
" 3011543 | \n",
" https://images.gr-assets.com/books/1474154022m... | \n",
" https://images.gr-assets.com/books/1474154022s... | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 41865 | \n",
" 41865 | \n",
" 3212258 | \n",
" 226 | \n",
" 316015849 | \n",
" 9.780316e+12 | \n",
" Stephenie Meyer | \n",
" 2005.0 | \n",
" Twilight | \n",
" Twilight (Twilight, #1) | \n",
" en-US | \n",
" 3.57 | \n",
" 3866839 | \n",
" 3916824 | \n",
" 95009 | \n",
" 456191 | \n",
" 436802 | \n",
" 793319 | \n",
" 875073 | \n",
" 1355439 | \n",
" https://images.gr-assets.com/books/1361039443m... | \n",
" https://images.gr-assets.com/books/1361039443s... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" book_id goodreads_book_id best_book_id work_id books_count isbn \\\n",
"0 1 2767052 2767052 2792775 272 439023483 \n",
"1 2 3 3 4640799 491 439554934 \n",
"2 3 41865 41865 3212258 226 316015849 \n",
"\n",
" isbn13 authors original_publication_year \\\n",
"0 9.780439e+12 Suzanne Collins 2008.0 \n",
"1 9.780440e+12 J.K. Rowling, Mary GrandPré 1997.0 \n",
"2 9.780316e+12 Stephenie Meyer 2005.0 \n",
"\n",
" original_title \\\n",
"0 The Hunger Games \n",
"1 Harry Potter and the Philosopher's Stone \n",
"2 Twilight \n",
"\n",
" title language_code \\\n",
"0 The Hunger Games (The Hunger Games, #1) eng \n",
"1 Harry Potter and the Sorcerer's Stone (Harry P... eng \n",
"2 Twilight (Twilight, #1) en-US \n",
"\n",
" average_rating ratings_count work_ratings_count work_text_reviews_count \\\n",
"0 4.34 4780653 4942365 155254 \n",
"1 4.44 4602479 4800065 75867 \n",
"2 3.57 3866839 3916824 95009 \n",
"\n",
" ratings_1 ratings_2 ratings_3 ratings_4 ratings_5 \\\n",
"0 66715 127936 560092 1481305 2706317 \n",
"1 75504 101676 455024 1156318 3011543 \n",
"2 456191 436802 793319 875073 1355439 \n",
"\n",
" image_url \\\n",
"0 https://images.gr-assets.com/books/1447303603m... \n",
"1 https://images.gr-assets.com/books/1474154022m... \n",
"2 https://images.gr-assets.com/books/1361039443m... \n",
"\n",
" small_image_url \n",
"0 https://images.gr-assets.com/books/1447303603s... \n",
"1 https://images.gr-assets.com/books/1474154022s... \n",
"2 https://images.gr-assets.com/books/1361039443s... "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.set_option('display.max_columns',100)\n",
"book.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user_id | \n",
" book_id | \n",
" rating | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 258 | \n",
" 5 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 4081 | \n",
" 4 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" 260 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user_id book_id rating\n",
"0 1 258 5\n",
"1 2 4081 4\n",
"2 2 260 5"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rating.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data Merging"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Data merging done on common column i.e. book id\n",
"df=rating.merge(book,on='book_id')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user_id | \n",
" book_id | \n",
" rating | \n",
" goodreads_book_id | \n",
" best_book_id | \n",
" work_id | \n",
" books_count | \n",
" isbn | \n",
" isbn13 | \n",
" authors | \n",
" original_publication_year | \n",
" original_title | \n",
" title | \n",
" language_code | \n",
" average_rating | \n",
" ratings_count | \n",
" work_ratings_count | \n",
" work_text_reviews_count | \n",
" ratings_1 | \n",
" ratings_2 | \n",
" ratings_3 | \n",
" ratings_4 | \n",
" ratings_5 | \n",
" image_url | \n",
" small_image_url | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 258 | \n",
" 5 | \n",
" 1232 | \n",
" 1232 | \n",
" 3209783 | \n",
" 279 | \n",
" 143034901 | \n",
" 9.780143e+12 | \n",
" Carlos Ruiz Zafón, Lucia Graves | \n",
" 2001.0 | \n",
" La sombra del viento | \n",
" The Shadow of the Wind (The Cemetery of Forgot... | \n",
" eng | \n",
" 4.24 | \n",
" 263685 | \n",
" 317554 | \n",
" 24652 | \n",
" 4789 | \n",
" 11769 | \n",
" 42214 | \n",
" 101612 | \n",
" 157170 | \n",
" https://images.gr-assets.com/books/1344545047m... | \n",
" https://images.gr-assets.com/books/1344545047s... | \n",
"
\n",
" \n",
" | 1 | \n",
" 11 | \n",
" 258 | \n",
" 3 | \n",
" 1232 | \n",
" 1232 | \n",
" 3209783 | \n",
" 279 | \n",
" 143034901 | \n",
" 9.780143e+12 | \n",
" Carlos Ruiz Zafón, Lucia Graves | \n",
" 2001.0 | \n",
" La sombra del viento | \n",
" The Shadow of the Wind (The Cemetery of Forgot... | \n",
" eng | \n",
" 4.24 | \n",
" 263685 | \n",
" 317554 | \n",
" 24652 | \n",
" 4789 | \n",
" 11769 | \n",
" 42214 | \n",
" 101612 | \n",
" 157170 | \n",
" https://images.gr-assets.com/books/1344545047m... | \n",
" https://images.gr-assets.com/books/1344545047s... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user_id book_id rating goodreads_book_id best_book_id work_id \\\n",
"0 1 258 5 1232 1232 3209783 \n",
"1 11 258 3 1232 1232 3209783 \n",
"\n",
" books_count isbn isbn13 authors \\\n",
"0 279 143034901 9.780143e+12 Carlos Ruiz Zafón, Lucia Graves \n",
"1 279 143034901 9.780143e+12 Carlos Ruiz Zafón, Lucia Graves \n",
"\n",
" original_publication_year original_title \\\n",
"0 2001.0 La sombra del viento \n",
"1 2001.0 La sombra del viento \n",
"\n",
" title language_code \\\n",
"0 The Shadow of the Wind (The Cemetery of Forgot... eng \n",
"1 The Shadow of the Wind (The Cemetery of Forgot... eng \n",
"\n",
" average_rating ratings_count work_ratings_count work_text_reviews_count \\\n",
"0 4.24 263685 317554 24652 \n",
"1 4.24 263685 317554 24652 \n",
"\n",
" ratings_1 ratings_2 ratings_3 ratings_4 ratings_5 \\\n",
"0 4789 11769 42214 101612 157170 \n",
"1 4789 11769 42214 101612 157170 \n",
"\n",
" image_url \\\n",
"0 https://images.gr-assets.com/books/1344545047m... \n",
"1 https://images.gr-assets.com/books/1344545047m... \n",
"\n",
" small_image_url \n",
"0 https://images.gr-assets.com/books/1344545047s... \n",
"1 https://images.gr-assets.com/books/1344545047s... "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user_id | \n",
" book_id | \n",
" rating | \n",
" goodreads_book_id | \n",
" best_book_id | \n",
" work_id | \n",
" books_count | \n",
" isbn13 | \n",
" original_publication_year | \n",
" average_rating | \n",
" ratings_count | \n",
" work_ratings_count | \n",
" work_text_reviews_count | \n",
" ratings_1 | \n",
" ratings_2 | \n",
" ratings_3 | \n",
" ratings_4 | \n",
" ratings_5 | \n",
"
\n",
" \n",
" \n",
" \n",
" | count | \n",
" 162604.000000 | \n",
" 162604.000000 | \n",
" 162604.000000 | \n",
" 1.626040e+05 | \n",
" 1.626040e+05 | \n",
" 1.626040e+05 | \n",
" 162604.000000 | \n",
" 1.607660e+05 | \n",
" 162579.000000 | \n",
" 162604.000000 | \n",
" 1.626040e+05 | \n",
" 1.626040e+05 | \n",
" 162604.000000 | \n",
" 162604.00000 | \n",
" 162604.000000 | \n",
" 162604.000000 | \n",
" 1.626040e+05 | \n",
" 1.626040e+05 | \n",
"
\n",
" \n",
" | mean | \n",
" 2085.741765 | \n",
" 1772.377119 | \n",
" 3.813184 | \n",
" 1.333508e+05 | \n",
" 4.646940e+05 | \n",
" 2.359180e+06 | \n",
" 305.715032 | \n",
" 9.776478e+12 | \n",
" 1945.111823 | \n",
" 3.974965 | \n",
" 4.176416e+05 | \n",
" 4.464290e+05 | \n",
" 10714.792059 | \n",
" 11066.13202 | \n",
" 22563.078842 | \n",
" 77512.512386 | \n",
" 1.404566e+05 | \n",
" 1.948307e+05 | \n",
"
\n",
" \n",
" | std | \n",
" 1212.894799 | \n",
" 2361.275403 | \n",
" 1.001269 | \n",
" 9.495106e+05 | \n",
" 2.865794e+06 | \n",
" 4.224005e+06 | \n",
" 503.339815 | \n",
" 1.988365e+11 | \n",
" 220.054157 | \n",
" 0.254314 | \n",
" 6.719332e+05 | \n",
" 7.053673e+05 | \n",
" 14638.133223 | \n",
" 22737.33095 | \n",
" 37251.600050 | \n",
" 112010.698268 | \n",
" 2.066473e+05 | \n",
" 3.699612e+05 | \n",
"
\n",
" \n",
" | min | \n",
" 1.000000 | \n",
" 2.000000 | \n",
" 1.000000 | \n",
" 1.000000e+00 | \n",
" 1.000000e+00 | \n",
" 8.700000e+01 | \n",
" 1.000000 | \n",
" 1.951703e+08 | \n",
" -1750.000000 | \n",
" 2.470000 | \n",
" 2.716000e+03 | \n",
" 5.510000e+03 | \n",
" 17.000000 | \n",
" 11.00000 | \n",
" 30.000000 | \n",
" 323.000000 | \n",
" 7.500000e+02 | \n",
" 7.810000e+02 | \n",
"
\n",
" \n",
" | 25% | \n",
" 1037.000000 | \n",
" 136.000000 | \n",
" 3.000000 | \n",
" 5.211000e+03 | \n",
" 5.348000e+03 | \n",
" 9.007640e+05 | \n",
" 60.000000 | \n",
" 9.780307e+12 | \n",
" 1955.000000 | \n",
" 3.820000 | \n",
" 4.006300e+04 | \n",
" 4.421500e+04 | \n",
" 1587.000000 | \n",
" 682.00000 | \n",
" 2227.000000 | \n",
" 9745.000000 | \n",
" 1.580200e+04 | \n",
" 1.414800e+04 | \n",
"
\n",
" \n",
" | 50% | \n",
" 2048.000000 | \n",
" 680.000000 | \n",
" 4.000000 | \n",
" 1.493500e+04 | \n",
" 1.588100e+04 | \n",
" 1.758256e+06 | \n",
" 114.000000 | \n",
" 9.780440e+12 | \n",
" 1989.000000 | \n",
" 3.970000 | \n",
" 1.310040e+05 | \n",
" 1.423260e+05 | \n",
" 4587.000000 | \n",
" 2631.00000 | \n",
" 7716.000000 | \n",
" 29992.000000 | \n",
" 4.949300e+04 | \n",
" 4.757200e+04 | \n",
"
\n",
" \n",
" | 75% | \n",
" 3101.000000 | \n",
" 2465.250000 | \n",
" 5.000000 | \n",
" 5.203600e+04 | \n",
" 6.496000e+04 | \n",
" 2.977639e+06 | \n",
" 283.000000 | \n",
" 9.780685e+12 | \n",
" 2000.000000 | \n",
" 4.140000 | \n",
" 4.698360e+05 | \n",
" 5.105820e+05 | \n",
" 13050.000000 | \n",
" 10381.00000 | \n",
" 24094.000000 | \n",
" 93427.000000 | \n",
" 1.654770e+05 | \n",
" 1.875690e+05 | \n",
"
\n",
" \n",
" | max | \n",
" 5518.000000 | \n",
" 10000.000000 | \n",
" 5.000000 | \n",
" 1.974776e+07 | \n",
" 3.360215e+07 | \n",
" 5.639960e+07 | \n",
" 3455.000000 | \n",
" 9.790008e+12 | \n",
" 2014.000000 | \n",
" 4.820000 | \n",
" 4.602479e+06 | \n",
" 4.800065e+06 | \n",
" 95009.000000 | \n",
" 456191.00000 | \n",
" 436802.000000 | \n",
" 793319.000000 | \n",
" 1.156318e+06 | \n",
" 3.011543e+06 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user_id book_id rating goodreads_book_id \\\n",
"count 162604.000000 162604.000000 162604.000000 1.626040e+05 \n",
"mean 2085.741765 1772.377119 3.813184 1.333508e+05 \n",
"std 1212.894799 2361.275403 1.001269 9.495106e+05 \n",
"min 1.000000 2.000000 1.000000 1.000000e+00 \n",
"25% 1037.000000 136.000000 3.000000 5.211000e+03 \n",
"50% 2048.000000 680.000000 4.000000 1.493500e+04 \n",
"75% 3101.000000 2465.250000 5.000000 5.203600e+04 \n",
"max 5518.000000 10000.000000 5.000000 1.974776e+07 \n",
"\n",
" best_book_id work_id books_count isbn13 \\\n",
"count 1.626040e+05 1.626040e+05 162604.000000 1.607660e+05 \n",
"mean 4.646940e+05 2.359180e+06 305.715032 9.776478e+12 \n",
"std 2.865794e+06 4.224005e+06 503.339815 1.988365e+11 \n",
"min 1.000000e+00 8.700000e+01 1.000000 1.951703e+08 \n",
"25% 5.348000e+03 9.007640e+05 60.000000 9.780307e+12 \n",
"50% 1.588100e+04 1.758256e+06 114.000000 9.780440e+12 \n",
"75% 6.496000e+04 2.977639e+06 283.000000 9.780685e+12 \n",
"max 3.360215e+07 5.639960e+07 3455.000000 9.790008e+12 \n",
"\n",
" original_publication_year average_rating ratings_count \\\n",
"count 162579.000000 162604.000000 1.626040e+05 \n",
"mean 1945.111823 3.974965 4.176416e+05 \n",
"std 220.054157 0.254314 6.719332e+05 \n",
"min -1750.000000 2.470000 2.716000e+03 \n",
"25% 1955.000000 3.820000 4.006300e+04 \n",
"50% 1989.000000 3.970000 1.310040e+05 \n",
"75% 2000.000000 4.140000 4.698360e+05 \n",
"max 2014.000000 4.820000 4.602479e+06 \n",
"\n",
" work_ratings_count work_text_reviews_count ratings_1 \\\n",
"count 1.626040e+05 162604.000000 162604.00000 \n",
"mean 4.464290e+05 10714.792059 11066.13202 \n",
"std 7.053673e+05 14638.133223 22737.33095 \n",
"min 5.510000e+03 17.000000 11.00000 \n",
"25% 4.421500e+04 1587.000000 682.00000 \n",
"50% 1.423260e+05 4587.000000 2631.00000 \n",
"75% 5.105820e+05 13050.000000 10381.00000 \n",
"max 4.800065e+06 95009.000000 456191.00000 \n",
"\n",
" ratings_2 ratings_3 ratings_4 ratings_5 \n",
"count 162604.000000 162604.000000 1.626040e+05 1.626040e+05 \n",
"mean 22563.078842 77512.512386 1.404566e+05 1.948307e+05 \n",
"std 37251.600050 112010.698268 2.066473e+05 3.699612e+05 \n",
"min 30.000000 323.000000 7.500000e+02 7.810000e+02 \n",
"25% 2227.000000 9745.000000 1.580200e+04 1.414800e+04 \n",
"50% 7716.000000 29992.000000 4.949300e+04 4.757200e+04 \n",
"75% 24094.000000 93427.000000 1.654770e+05 1.875690e+05 \n",
"max 436802.000000 793319.000000 1.156318e+06 3.011543e+06 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Int64Index: 162604 entries, 0 to 162603\n",
"Data columns (total 25 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 user_id 162604 non-null int64 \n",
" 1 book_id 162604 non-null int64 \n",
" 2 rating 162604 non-null int64 \n",
" 3 goodreads_book_id 162604 non-null int64 \n",
" 4 best_book_id 162604 non-null int64 \n",
" 5 work_id 162604 non-null int64 \n",
" 6 books_count 162604 non-null int64 \n",
" 7 isbn 160413 non-null object \n",
" 8 isbn13 160766 non-null float64\n",
" 9 authors 162604 non-null object \n",
" 10 original_publication_year 162579 non-null float64\n",
" 11 original_title 160786 non-null object \n",
" 12 title 162604 non-null object \n",
" 13 language_code 151898 non-null object \n",
" 14 average_rating 162604 non-null float64\n",
" 15 ratings_count 162604 non-null int64 \n",
" 16 work_ratings_count 162604 non-null int64 \n",
" 17 work_text_reviews_count 162604 non-null int64 \n",
" 18 ratings_1 162604 non-null int64 \n",
" 19 ratings_2 162604 non-null int64 \n",
" 20 ratings_3 162604 non-null int64 \n",
" 21 ratings_4 162604 non-null int64 \n",
" 22 ratings_5 162604 non-null int64 \n",
" 23 image_url 162604 non-null object \n",
" 24 small_image_url 162604 non-null object \n",
"dtypes: float64(3), int64(15), object(7)\n",
"memory usage: 32.3+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data Cleaning "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"user_id 0\n",
"book_id 0\n",
"rating 0\n",
"goodreads_book_id 0\n",
"best_book_id 0\n",
"work_id 0\n",
"books_count 0\n",
"isbn 2191\n",
"isbn13 1838\n",
"authors 0\n",
"original_publication_year 25\n",
"original_title 1818\n",
"title 0\n",
"language_code 10706\n",
"average_rating 0\n",
"ratings_count 0\n",
"work_ratings_count 0\n",
"work_text_reviews_count 0\n",
"ratings_1 0\n",
"ratings_2 0\n",
"ratings_3 0\n",
"ratings_4 0\n",
"ratings_5 0\n",
"image_url 0\n",
"small_image_url 0\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"#Filling null values of original_title with Na and 0\n",
"df['original_title']=df['original_title'].fillna('Na') \n",
"df['original_publication_year']=df['original_publication_year'].fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#Since, all columns of data are not required. So, extracting the columns that are only necessary\n",
"dfuse=df[['user_id','book_id','rating','books_count','original_publication_year','average_rating','ratings_count','title','original_title','authors','image_url']]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user_id | \n",
" book_id | \n",
" rating | \n",
" books_count | \n",
" original_publication_year | \n",
" average_rating | \n",
" ratings_count | \n",
" title | \n",
" original_title | \n",
" authors | \n",
" image_url | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 258 | \n",
" 5 | \n",
" 279 | \n",
" 2001.0 | \n",
" 4.24 | \n",
" 263685 | \n",
" The Shadow of the Wind (The Cemetery of Forgot... | \n",
" La sombra del viento | \n",
" Carlos Ruiz Zafón, Lucia Graves | \n",
" https://images.gr-assets.com/books/1344545047m... | \n",
"
\n",
" \n",
" | 1 | \n",
" 11 | \n",
" 258 | \n",
" 3 | \n",
" 279 | \n",
" 2001.0 | \n",
" 4.24 | \n",
" 263685 | \n",
" The Shadow of the Wind (The Cemetery of Forgot... | \n",
" La sombra del viento | \n",
" Carlos Ruiz Zafón, Lucia Graves | \n",
" https://images.gr-assets.com/books/1344545047m... | \n",
"
\n",
" \n",
" | 2 | \n",
" 143 | \n",
" 258 | \n",
" 4 | \n",
" 279 | \n",
" 2001.0 | \n",
" 4.24 | \n",
" 263685 | \n",
" The Shadow of the Wind (The Cemetery of Forgot... | \n",
" La sombra del viento | \n",
" Carlos Ruiz Zafón, Lucia Graves | \n",
" https://images.gr-assets.com/books/1344545047m... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user_id book_id rating books_count original_publication_year \\\n",
"0 1 258 5 279 2001.0 \n",
"1 11 258 3 279 2001.0 \n",
"2 143 258 4 279 2001.0 \n",
"\n",
" average_rating ratings_count \\\n",
"0 4.24 263685 \n",
"1 4.24 263685 \n",
"2 4.24 263685 \n",
"\n",
" title original_title \\\n",
"0 The Shadow of the Wind (The Cemetery of Forgot... La sombra del viento \n",
"1 The Shadow of the Wind (The Cemetery of Forgot... La sombra del viento \n",
"2 The Shadow of the Wind (The Cemetery of Forgot... La sombra del viento \n",
"\n",
" authors \\\n",
"0 Carlos Ruiz Zafón, Lucia Graves \n",
"1 Carlos Ruiz Zafón, Lucia Graves \n",
"2 Carlos Ruiz Zafón, Lucia Graves \n",
"\n",
" image_url \n",
"0 https://images.gr-assets.com/books/1344545047m... \n",
"1 https://images.gr-assets.com/books/1344545047m... \n",
"2 https://images.gr-assets.com/books/1344545047m... "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfuse.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Visualisation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q1 Maximum Count of Ratings are of ?"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "variable=book_id
rating=%{x}
value=%{y}",
"legendgroup": "book_id",
"marker": {
"color": "green"
},
"name": "book_id",
"offsetgroup": "book_id",
"orientation": "v",
"showlegend": true,
"textposition": "auto",
"type": "bar",
"x": [
1,
2,
3,
4,
5
],
"xaxis": "x",
"y": [
3633,
11948,
41892,
58821,
46310
],
"yaxis": "y"
}
],
"layout": {
"barmode": "relative",
"height": 500,
"legend": {
"title": {
"text": "variable"
},
"tracegroupgap": 0
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Plot of Rating with Books Count"
},
"width": 700,
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"title": {
"text": "rating"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"title": {
"text": "value"
}
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"px.bar(dfuse.groupby('rating')['book_id'].count(), title='Plot of Rating with Books Count',color_discrete_sequence =['green']*3, height=500, width=700)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As shown here, maximum rating of 4 are given 58.821K times where rating 5 are given 46.31K. Least rating given is 1 to only 3633. This shows given in data are good books since rating 3 are 4 times given than rating 2."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q2 Top 5 Books which are Rated in High Count ?"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "variable=rating
book_id=%{x}
value=%{y}",
"legendgroup": "rating",
"marker": {
"color": "light blue"
},
"name": "rating",
"offsetgroup": "rating",
"orientation": "v",
"showlegend": true,
"textposition": "auto",
"type": "bar",
"x": [
26,
2,
4,
18,
8
],
"xaxis": "x",
"y": [
1010,
922,
922,
883,
883
],
"yaxis": "y"
}
],
"layout": {
"barmode": "relative",
"height": 500,
"legend": {
"title": {
"text": "variable"
},
"tracegroupgap": 0
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Plot of Top 5 Highly Rated in Counts"
},
"width": 700,
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"title": {
"text": "book_id"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"title": {
"text": "value"
}
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"px.bar(dfuse.groupby('book_id')['rating'].count().sort_values(ascending=False).nlargest(5), title='Plot of Top 5 Highly Rated in Counts',color_discrete_sequence =['light blue']*3, height=500, width=700 )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As shown maximum rating count is of book id 26 with 1010 count followed by count of 922 of book id of 2 and 4."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q3 Which are Top Highly Rated Books Based on Average Rating ?"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "variable=average_rating
title=%{x}
value=%{y}",
"legendgroup": "average_rating",
"marker": {
"color": "red"
},
"name": "average_rating",
"offsetgroup": "average_rating",
"orientation": "v",
"showlegend": true,
"textposition": "auto",
"type": "bar",
"x": [
"The Complete Calvin and Hobbes",
"Harry Potter Boxed Set, Books 1-5 (Harry Potter, #1-5)",
"Mark of the Lion Trilogy",
"It's a Magical World: A Calvin and Hobbes Collection",
"Harry Potter Boxset (Harry Potter, #1-7)"
],
"xaxis": "x",
"y": [
4.8199999999999985,
4.77,
4.759999999999999,
4.75,
4.740000000000002
],
"yaxis": "y"
}
],
"layout": {
"barmode": "relative",
"height": 500,
"legend": {
"title": {
"text": "variable"
},
"tracegroupgap": 0
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Plot of Top 3 Highly Rated Books on Basis of Average Ratings"
},
"width": 700,
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"title": {
"text": "title"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"title": {
"text": "value"
}
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"px.bar(dfuse.groupby(str('title'))['average_rating'].mean().sort_values(ascending=False).nlargest(5), title='Plot of Top 3 Highly Rated Books on Basis of Average Ratings',color_discrete_sequence =['red']*3,height=500, width=700 )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As shown above, highest rating average of 4.82 is given to book \"The Complete Calvin and Hobbies\" followed by rating of 4.77 to Harry Potter(1-5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q4 Which User Rated Maximum ?"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "variable=rating
user_id=%{x}
value=%{y}",
"legendgroup": "rating",
"marker": {
"color": "orange"
},
"name": "rating",
"offsetgroup": "rating",
"orientation": "v",
"showlegend": true,
"textposition": "auto",
"type": "bar",
"x": [
2276,
4147,
2501,
1794,
1350,
3261,
4212,
1225,
997,
2948
],
"xaxis": "x",
"y": [
185,
179,
172,
172,
171,
168,
168,
166,
166,
165
],
"yaxis": "y"
}
],
"layout": {
"barmode": "relative",
"height": 500,
"legend": {
"title": {
"text": "variable"
},
"tracegroupgap": 0
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Plot of User Id with Ratings Count"
},
"width": 700,
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"title": {
"text": "user_id"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"title": {
"text": "value"
}
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"px.bar(dfuse.groupby(str('user_id'))['rating'].count().sort_values(ascending=False).nlargest(10), title='Plot of User Id with Ratings Count',color_discrete_sequence =['orange']*3 , height=500, width=700)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As shown above, user id of 2276 rated maximum books of 185 in count followed by value of 179 by user if 4147 then value of 172 with user id 1794"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q5 Which Original Title has Maximum Rating ?"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"df_1=pd.DataFrame(dfuse.groupby('original_title')['rating'].count().sort_values(ascending=False).nlargest(6))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"df_1['Image']=\" \"\n",
"def path_to_image_html(path):\n",
" return '
'"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"for i in range(1,len(df_1.index)):\n",
" url=dfuse[dfuse['original_title']==df_1.index[i]]['image_url'].unique()\n",
" df_1['Image'][i]=url[0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" rating | \n",
" Image | \n",
"
\n",
" \n",
" | original_title | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | The Da Vinci Code | \n",
" 1010 | \n",
"  | \n",
"
\n",
" \n",
" | To Kill a Mockingbird | \n",
" 922 | \n",
"  | \n",
"
\n",
" \n",
" | Harry Potter and the Philosopher's Stone | \n",
" 922 | \n",
"  | \n",
"
\n",
" \n",
" | The Catcher in the Rye | \n",
" 883 | \n",
"  | \n",
"
\n",
" \n",
" | Harry Potter and the Prisoner of Azkaban | \n",
" 883 | \n",
"  | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"image_cols = ['Image']\n",
"\n",
"# Create the dictionariy to be passed as formatters\n",
"format_dict = {}\n",
"for image_col in image_cols:\n",
" format_dict[image_col] = path_to_image_html\n",
"\n",
"display(HTML(df_1[1:].to_html(escape=False ,formatters=format_dict)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As given in above dataframe, \"The Da Vinci Code\" has the maximum rating count of 1010 haing book id 26 as mentioned in Q2 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Recommendation System "
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# create pivot table to study the user and book relation\n",
"df_pivot=dfuse.pivot_table(values='rating',index='user_id',columns='original_title',fill_value=0)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | original_title | \n",
" | \n",
" Animal Farm & 1984 | \n",
" Burned (Burned, #1) | \n",
" Carter Beats the Devil | \n",
" De Imitatione Christi | \n",
" Faust. Der Tragödie erster und zweiter Teil | \n",
" Feeling Good: The New Mood Therapy | \n",
" Innocent Traitor | \n",
" The Fellowship of the Ring | \n",
" The Sea of Trolls | \n",
" The Tale of Despereaux | \n",
" Игрок | \n",
" الرحيق المختوم: بحث في السيرة النبوية على صاحبها أفضل الصلاة والسلام | \n",
" سمفونی مردگان | \n",
" めくらやなぎと、眠る女 [Mekurayanagi to, nemuru onna] | \n",
" 国境の南、太陽の西 [Kokkyō no minami, taiyō no nishi] | \n",
" 絶対彼氏 (Zettai Kareshi) 1 | \n",
" \"...And Ladies of the Club\" | \n",
" \"D\" is for Deadbeat | \n",
" \"M\" is for Malice | \n",
" \"N\" is for Noose | \n",
" \"O\" is for Outlaw | \n",
" \"P\" is for Peril | \n",
" \"Q\" is for Quarry | \n",
" 'Tis: A Memoir | \n",
" ... Then He Ate My Boy Entracers | \n",
" ...and that's when it fell off in my hand | \n",
" 1,000 Places to See Before You Die | \n",
" 100 Bullets, Vol. 1: First Shot, Last Call (100 bullets, #1) | \n",
" 100 Selected Poems by e. e. Cummings | \n",
" 13 Little Blue Envelopes | \n",
" 1421: The Year China Discovered America | \n",
" 1491: New Revelations of the Americas Before Columbus | \n",
" 16 Lighthouse Road | \n",
" 1776 | \n",
" 1st To Die | \n",
" 2001: A Space Odyssey | \n",
" 2010: Odyssey Two | \n",
" 204 Rosewood Lane | \n",
" 2061: Odyssey Three | \n",
" 20th Century Ghosts | \n",
" 2666 | \n",
" 2nd Chance | \n",
" 30 Days of Night | \n",
" 300 | \n",
" 3001: The Final Odyssey | \n",
" 31 Songs | \n",
" 3rd Degree | \n",
" 4.50 From Paddington | \n",
" 44 Scotland Street | \n",
" ... | \n",
" デスノート #2 (Desu Nōto) Gōryū (合流) | \n",
" デスノート #3 (Desu Nōto) Gekisō (激走) | \n",
" デスノート #4 (Desu Nōto) Koigokoro (恋心) | \n",
" デスノート #5 (Desu Nōto) Hakushi (白紙,) | \n",
" デスノート #6 (Desu Nōto) Kōkan (交換) | \n",
" デスノート #7 (Desu Nōto) Zero (零) | \n",
" デスノート #8 (Desu Nōto) Mato (的) | \n",
" ノルウェイの森 [Noruwei no Mori] | \n",
" バトル・ロワイアル | \n",
" フルーツバスケット 1 | \n",
" フルーツバスケット 14 | \n",
" フルーツバスケット 15 | \n",
" フルーツバスケット 2 | \n",
" フルーツバスケット 3 | \n",
" フルーツバスケット 4 | \n",
" フルーツバスケット 7 | \n",
" マース (Mars) #1 | \n",
" ヤマトナデシコ七変化 | \n",
" ラブひな #1 | \n",
" ヴァンパイア騎士 1 | \n",
" ヴァンパイア騎士 2 | \n",
" ヴァンパイア騎士 3 | \n",
" ヴァンパイア騎士 4 | \n",
" 世界の終りとハードボイルド・ワンダーランド [Sekai no owari to hādoboirudo wandārando] | \n",
" 五輪書 [Go Rin no Sho] | \n",
" 午後の曳航 [Gogo no eikō] | \n",
" 名探偵コナン 1 | \n",
" 孫子兵法 [Sūnzi bīngfǎ] | \n",
" 宮本武蔵 Miyamoto Musashi | \n",
" 易 [Yì] | \n",
" 桜蘭高校ホスト部 1 | \n",
" 桜蘭高校ホスト部 2 | \n",
" 桜蘭高校ホスト部 3 | \n",
" 桜蘭高校ホスト部 4 | \n",
" 沈黙 | \n",
" 海辺のカフカ [Umibe no Kafuka] | \n",
" 満月をさがして 1 | \n",
" 犬夜叉 1 | \n",
" 砂の女 [Suna no onna] | \n",
" 神の子どもたちはみな踊る | \n",
" 窓ぎわのトットちゃん | \n",
" 紳士同盟† 1 | \n",
" 羊をめぐる冒険 [Hitsujiwomegurubōken] | \n",
" 美少女戦士セーラームーン 1 [Bishōjo Senshi Sailor Moon 1] | \n",
" 花より男子 1 | \n",
" 論語 [Lún Yǔ] | \n",
" 象の消滅 [Zō no shōmetsu] | \n",
" 道德經 [dào dé jīng] | \n",
" 鋼の錬金術師 1 | \n",
" 雪国 | \n",
"
\n",
" \n",
" | user_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 6 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 5286 columns
\n",
"
"
],
"text/plain": [
"original_title Animal Farm & 1984 Burned (Burned, #1) \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title Carter Beats the Devil De Imitatione Christi \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title Faust. Der Tragödie erster und zweiter Teil \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title Feeling Good: The New Mood Therapy Innocent Traitor \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title The Fellowship of the Ring The Sea of Trolls \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title The Tale of Despereaux Игрок \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title الرحيق المختوم: بحث في السيرة النبوية على صاحبها أفضل الصلاة والسلام \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title سمفونی مردگان めくらやなぎと、眠る女 [Mekurayanagi to, nemuru onna] \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title 国境の南、太陽の西 [Kokkyō no minami, taiyō no nishi] \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 絶対彼氏 (Zettai Kareshi) 1 \"...And Ladies of the Club\" \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title \"D\" is for Deadbeat \"M\" is for Malice \"N\" is for Noose \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title \"O\" is for Outlaw \"P\" is for Peril \"Q\" is for Quarry \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title 'Tis: A Memoir ... Then He Ate My Boy Entracers \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title ...and that's when it fell off in my hand \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 1,000 Places to See Before You Die \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 100 Bullets, Vol. 1: First Shot, Last Call (100 bullets, #1) \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 100 Selected Poems by e. e. Cummings \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 13 Little Blue Envelopes \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 1421: The Year China Discovered America \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 1491: New Revelations of the Americas Before Columbus \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 16 Lighthouse Road 1776 1st To Die 2001: A Space Odyssey \\\n",
"user_id \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"6 0 0 0 0 \n",
"\n",
"original_title 2010: Odyssey Two 204 Rosewood Lane 2061: Odyssey Three \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title 20th Century Ghosts 2666 2nd Chance 30 Days of Night 300 \\\n",
"user_id \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"6 0 0 0 0 0 \n",
"\n",
"original_title 3001: The Final Odyssey 31 Songs 3rd Degree \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title 4.50 From Paddington 44 Scotland Street ... \\\n",
"user_id ... \n",
"1 0 0 ... \n",
"2 0 0 ... \n",
"3 0 0 ... \n",
"4 0 0 ... \n",
"6 0 0 ... \n",
"\n",
"original_title デスノート #2 (Desu Nōto) Gōryū (合流) \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title デスノート #3 (Desu Nōto) Gekisō (激走) \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title デスノート #4 (Desu Nōto) Koigokoro (恋心) \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title デスノート #5 (Desu Nōto) Hakushi (白紙,) \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title デスノート #6 (Desu Nōto) Kōkan (交換) \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title デスノート #7 (Desu Nōto) Zero (零) デスノート #8 (Desu Nōto) Mato (的) \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title ノルウェイの森 [Noruwei no Mori] バトル・ロワイアル フルーツバスケット 1 \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title フルーツバスケット 14 フルーツバスケット 15 フルーツバスケット 2 フルーツバスケット 3 \\\n",
"user_id \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"6 0 0 0 0 \n",
"\n",
"original_title フルーツバスケット 4 フルーツバスケット 7 マース (Mars) #1 ヤマトナデシコ七変化 ラブひな #1 \\\n",
"user_id \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"6 0 0 0 0 0 \n",
"\n",
"original_title ヴァンパイア騎士 1 ヴァンパイア騎士 2 ヴァンパイア騎士 3 ヴァンパイア騎士 4 \\\n",
"user_id \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"6 0 0 0 0 \n",
"\n",
"original_title 世界の終りとハードボイルド・ワンダーランド [Sekai no owari to hādoboirudo wandārando] \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 五輪書 [Go Rin no Sho] 午後の曳航 [Gogo no eikō] 名探偵コナン 1 \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title 孫子兵法 [Sūnzi bīngfǎ] 宮本武蔵 Miyamoto Musashi 易 [Yì] \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title 桜蘭高校ホスト部 1 桜蘭高校ホスト部 2 桜蘭高校ホスト部 3 桜蘭高校ホスト部 4 沈黙 \\\n",
"user_id \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"6 0 0 0 0 0 \n",
"\n",
"original_title 海辺のカフカ [Umibe no Kafuka] 満月をさがして 1 犬夜叉 1 \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title 砂の女 [Suna no onna] 神の子どもたちはみな踊る 窓ぎわのトットちゃん 紳士同盟† 1 \\\n",
"user_id \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"6 0 0 0 0 \n",
"\n",
"original_title 羊をめぐる冒険 [Hitsujiwomegurubōken] \\\n",
"user_id \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"6 0 \n",
"\n",
"original_title 美少女戦士セーラームーン 1 [Bishōjo Senshi Sailor Moon 1] 花より男子 1 \\\n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"original_title 論語 [Lún Yǔ] 象の消滅 [Zō no shōmetsu] 道德經 [dào dé jīng] \\\n",
"user_id \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"6 0 0 0 \n",
"\n",
"original_title 鋼の錬金術師 1 雪国 \n",
"user_id \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"6 0 0 \n",
"\n",
"[5 rows x 5286 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_pivot.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5286, 2780)"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Transposing the pivot table to easy access\n",
"x=df_pivot.values.T \n",
"x.shape "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Dimentionality Reduction Using SVD"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5286, 20)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"SVD = TruncatedSVD(n_components=20, random_state=17)\n",
"result_matrix = SVD.fit_transform(x)\n",
"result_matrix.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Creating the Pearson's Correlation "
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5286, 5286)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"corr_mat = np.corrcoef(result_matrix)\n",
"corr_mat.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Recommendation of The Entered Book"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"book_names = df_pivot.columns\n",
"book_list = list(book_names)\n",
"book_index = book_list.index(' Innocent Traitor') "
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5286,)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"corr_book = corr_mat[book_index] \n",
"corr_book.shape"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Recommendation are\n"
]
},
{
"data": {
"text/plain": [
"['A Tree Grows In Brooklyn ',\n",
" 'Breathing Lessons',\n",
" 'Drowning Ruth',\n",
" \"French Women Don't Get Fat\",\n",
" 'Girl with a Pearl Earring',\n",
" 'September',\n",
" 'The Accidental Tourist',\n",
" \"The Bonesetter's Daughter\",\n",
" 'The Joy Luck Club',\n",
" 'The Last Days of Dogtown',\n",
" 'The Prince of Tides',\n",
" 'We Were the Mulvaneys',\n",
" 'Winter Solstice']"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"Recommendation are\")\n",
"list(book_names[(corr_book<1.0) & (corr_book>0.8)][1:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Function for Recommendation System Gives Top 5 Books"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"def path_to_image_html(path):\n",
" return '
'\n",
"\n",
"def df_recommend(recommend):\n",
" recommend=recommend[:5]\n",
" year=[]\n",
" image_url=[]\n",
" for i in recommend:\n",
" for j in dfuse.index:\n",
" if dfuse['original_title'][j]==i:\n",
" year.append(dfuse['original_publication_year'][j])\n",
" image_url.append(dfuse['image_url'][j]) \n",
" break\n",
" recommend_df=pd.DataFrame([recommend,year,image_url]).T\n",
" recommend_df.columns=['Recommend Books','Year of Publication','Image']\n",
" \n",
" image_cols = ['Image']\n",
"\n",
"\n",
" format_dict = {}\n",
" for image_col in image_cols:\n",
" format_dict[image_col] = path_to_image_html\n",
"\n",
" display(HTML(recommend_df[0:10].to_html(escape=False ,formatters=format_dict)))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"def recommend_book(df_pivot, corr_mat):\n",
" name=input(\"Enter the Name of the Book : \")\n",
" book_names = df_pivot.columns\n",
" book_list = list(book_names)\n",
" try:\n",
" if name in book_list:\n",
" book_index = book_list.index(name) \n",
" corr_book = corr_mat[book_index] \n",
" print(\"Recommendation are\")\n",
" recommend=list(book_names[(corr_book<1.0) & (corr_book>0.8)][1:])\n",
" df_recommend(recommend)\n",
"\n",
" else:\n",
" name=\" \"+name\n",
" book_index = book_list.index(name) \n",
" corr_book = corr_mat[book_index] \n",
" print(\"Recommendation are\")\n",
" recommend=list(book_names[(corr_book<1.0) & (corr_book>0.8)][1:])\n",
" df_recommend(recommend)\n",
" except:\n",
" print(\"Enter the Book Name Again\")\n",
" recommend_book(df_pivot,corr_mat) \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Enter the Name of the Book : The Prince of Tides\n",
"Recommendation are\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" Recommend Books | \n",
" Year of Publication | \n",
" Image | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A Tree Grows In Brooklyn | \n",
" 1943 | \n",
"  | \n",
"
\n",
" \n",
" | 1 | \n",
" All Over But the Shoutin' | \n",
" 1997 | \n",
"  | \n",
"
\n",
" \n",
" | 2 | \n",
" Angela's Ashes: A Memoir | \n",
" 1996 | \n",
"  | \n",
"
\n",
" \n",
" | 3 | \n",
" Back When We Were Grownups | \n",
" 2001 | \n",
"  | \n",
"
\n",
" \n",
" | 4 | \n",
" Beach Music | \n",
" 1975 | \n",
"  | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"recommend_book(df_pivot,corr_mat) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}