{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Table of Contents**\n", "\n", "
\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load modules and collect data\n", "- use `sqlite3` (database small enough to be held in memory)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import os\n", "from os.path import dirname, join\n", "\n", "import numpy as np\n", "import pandas.io.sql as psql\n", "import sqlite3 as sql\n", "\n", "\n", "from bokeh.sampledata.movies_data import movie_path\n", "__file__ = os.path.expanduser('~/git_local/bokeh_original/examples/app/movies/main.py')\n", "conn = sql.connect(movie_path)\n", "query = open(join(dirname(__file__), 'query.sql')).read()\n", "movies = psql.read_sql(query, conn)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT omdb.ID,\n", " imdbID,\n", " Title,\n", " Year,\n", " omdb.Rating as mpaaRating,\n", " Runtime,\n", " Genre,\n", " Released,\n", " Director,\n", " Writer,\n", " omdb.Cast,\n", " imdbRating,\n", " imdbVotes,\n", " Language,\n", " Country,\n", " Oscars,\n", " tomatoes.Rating as numericRating,\n", " Meter,\n", " Reviews,\n", " Fresh,\n", " Rotten,\n", " userMeter,\n", " userRating,\n", " userReviews,\n", " BoxOffice,\n", " Production\n", "FROM omdb, tomatoes\n", "WHERE omdb.ID = tomatoes.ID AND Reviews >= 10\n", "\n" ] } ], "source": [ "print query" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDimdbIDTitleYearmpaaRatingRuntimeGenreReleasedDirectorWriterCastimdbRatingimdbVotesLanguageCountryOscarsnumericRatingMeterReviewsFreshRottenuserMeteruserRatinguserReviewsBoxOfficeProduction
04972tt0004972The Birth of a Nation1915Not Rated165.0Drama, History, Romance1915-03-03D.W. GriffithThomas F. Dixon Jr. (adapted from his novel: \"...Lillian Gish, Mae Marsh, Henry B. Walthall, Mi...6.913793.0NoneUSA08.01003838058.03.34034.0NaNGravitas
16206tt0006206Les vampires1915Not Rated399.0Action, Adventure, Crime1915-11-13Louis FeuilladeLouis FeuilladeMusidora, Édouard Mathé, Marcel Lévesque, Jean...6.62502.0FrenchFrance08.81001313085.03.82075.0NaNNone
26864tt0006864Intolerance: Love's Struggle Throughout the Ages1916Not Rated197.0Drama, History1916-09-05D.W. GriffithD.W. Griffith (scenario), Anita Loos (titles)Lillian Gish, Spottiswoode Aitken, Mary Alden,...8.08673.0NoneUSA08.0962827178.03.84604.0NaNCohen Media Group
\n", "
" ], "text/plain": [ " ID imdbID Title Year \\\n", "0 4972 tt0004972 The Birth of a Nation 1915 \n", "1 6206 tt0006206 Les vampires 1915 \n", "2 6864 tt0006864 Intolerance: Love's Struggle Throughout the Ages 1916 \n", "\n", " mpaaRating Runtime Genre Released Director \\\n", "0 Not Rated 165.0 Drama, History, Romance 1915-03-03 D.W. Griffith \n", "1 Not Rated 399.0 Action, Adventure, Crime 1915-11-13 Louis Feuillade \n", "2 Not Rated 197.0 Drama, History 1916-09-05 D.W. Griffith \n", "\n", " Writer \\\n", "0 Thomas F. Dixon Jr. (adapted from his novel: \"... \n", "1 Louis Feuillade \n", "2 D.W. Griffith (scenario), Anita Loos (titles) \n", "\n", " Cast imdbRating imdbVotes \\\n", "0 Lillian Gish, Mae Marsh, Henry B. Walthall, Mi... 6.9 13793.0 \n", "1 Musidora, Édouard Mathé, Marcel Lévesque, Jean... 6.6 2502.0 \n", "2 Lillian Gish, Spottiswoode Aitken, Mary Alden,... 8.0 8673.0 \n", "\n", " Language Country Oscars numericRating Meter Reviews Fresh Rotten \\\n", "0 None USA 0 8.0 100 38 38 0 \n", "1 French France 0 8.8 100 13 13 0 \n", "2 None USA 0 8.0 96 28 27 1 \n", "\n", " userMeter userRating userReviews BoxOffice Production \n", "0 58.0 3.3 4034.0 NaN Gravitas \n", "1 85.0 3.8 2075.0 NaN None \n", "2 78.0 3.8 4604.0 NaN Cohen Media Group " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head(n=3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDYearRuntimeimdbRatingimdbVotesOscarsnumericRatingMeterReviewsFreshRottenuserMeteruserRatinguserReviewsBoxOffice
count1.2569e+0412569.000012569.000012549.00001.2548e+0412569.000012569.000012569.000012569.000012569.000012569.000012478.000012358.00001.2557e+044.1890e+03
mean5.8837e+051996.3287102.74646.61022.7486e+040.10716.080862.646752.454532.420020.034564.25933.40251.8156e+053.0246e+07
std6.5278e+0517.690923.52991.01076.5074e+040.60501.449727.598352.680938.394827.102119.86520.46141.9692e+065.7222e+07
min4.9720e+031902.00000.00001.10005.0000e+000.00001.10000.000010.00000.00000.00000.00000.50002.0000e+003.8200e+02
25%1.0386e+051991.000091.00006.00001.5518e+030.00005.100041.000016.00009.00004.000050.00003.10001.4560e+032.0000e+05
50%3.0021e+052002.0000100.00006.70005.6120e+030.00006.200069.000030.000018.00009.000067.00003.40006.7600e+035.1000e+06
75%9.9717e+052008.0000112.00007.30002.2700e+040.00007.200086.000068.000040.000023.000081.00003.70004.1481e+043.6000e+07
max3.4041e+062014.0000566.00009.30001.1875e+0611.000010.0000100.0000304.0000292.0000192.000099.00005.00003.5778e+077.6050e+08
\n", "
" ], "text/plain": [ " ID Year Runtime imdbRating imdbVotes Oscars \\\n", "count 1.2569e+04 12569.0000 12569.0000 12549.0000 1.2548e+04 12569.0000 \n", "mean 5.8837e+05 1996.3287 102.7464 6.6102 2.7486e+04 0.1071 \n", "std 6.5278e+05 17.6909 23.5299 1.0107 6.5074e+04 0.6050 \n", "min 4.9720e+03 1902.0000 0.0000 1.1000 5.0000e+00 0.0000 \n", "25% 1.0386e+05 1991.0000 91.0000 6.0000 1.5518e+03 0.0000 \n", "50% 3.0021e+05 2002.0000 100.0000 6.7000 5.6120e+03 0.0000 \n", "75% 9.9717e+05 2008.0000 112.0000 7.3000 2.2700e+04 0.0000 \n", "max 3.4041e+06 2014.0000 566.0000 9.3000 1.1875e+06 11.0000 \n", "\n", " numericRating Meter Reviews Fresh Rotten \\\n", "count 12569.0000 12569.0000 12569.0000 12569.0000 12569.0000 \n", "mean 6.0808 62.6467 52.4545 32.4200 20.0345 \n", "std 1.4497 27.5983 52.6809 38.3948 27.1021 \n", "min 1.1000 0.0000 10.0000 0.0000 0.0000 \n", "25% 5.1000 41.0000 16.0000 9.0000 4.0000 \n", "50% 6.2000 69.0000 30.0000 18.0000 9.0000 \n", "75% 7.2000 86.0000 68.0000 40.0000 23.0000 \n", "max 10.0000 100.0000 304.0000 292.0000 192.0000 \n", "\n", " userMeter userRating userReviews BoxOffice \n", "count 12478.0000 12358.0000 1.2557e+04 4.1890e+03 \n", "mean 64.2593 3.4025 1.8156e+05 3.0246e+07 \n", "std 19.8652 0.4614 1.9692e+06 5.7222e+07 \n", "min 0.0000 0.5000 2.0000e+00 3.8200e+02 \n", "25% 50.0000 3.1000 1.4560e+03 2.0000e+05 \n", "50% 67.0000 3.4000 6.7600e+03 5.1000e+06 \n", "75% 81.0000 3.7000 4.1481e+04 3.6000e+07 \n", "max 99.0000 5.0000 3.5778e+07 7.6050e+08 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.describe()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(12569, 26)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conn" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "movies[\"color\"] = np.where(movies[\"Oscars\"] > 0, \"orange\", \"grey\")\n", "movies[\"alpha\"] = np.where(movies[\"Oscars\"] > 0, 0.9, 0.25)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index([u'ID', u'imdbID', u'Title', u'Year', u'mpaaRating', u'Runtime',\n", " u'Genre', u'Released', u'Director', u'Writer', u'Cast', u'imdbRating',\n", " u'imdbVotes', u'Language', u'Country', u'Oscars', u'numericRating',\n", " u'Meter', u'Reviews', u'Fresh', u'Rotten', u'userMeter', u'userRating',\n", " u'userReviews', u'BoxOffice', u'Production', u'color', u'alpha'],\n", " dtype='object')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.columns" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249...12319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568
colorgreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreyorangegreyorangegreygreygreygreygreyorangegreygreygreygreygreyorangegreygreygreygreygreyorangegreygreygreygreygreygreygreygreygreygreygreyorangegreygreygreygreygreygreygreygreyorangegreygreyorangegreyorangegreygreygreygreygreygreyorangegreygreygreygreygreygreygreyorangegreygreygreygreygreygreygreygreygreyorangeorangegreygreygreygreygreygreyorangeorangegreygreygreygreygreygreygreygreygreygreygreygreygreygreyorangegreyorangegreygreygreygreygreygreygreyorangegreyorangegreygreyorangegreygreyorangegreygreyorangegreygreyorangegreyorangegreygreyorangegreygreyorangeorangegreygreygreygreygreyorangegreygreyorangegreygreyorangegreygreygreygreyorangegreyorangegreygreyorangegreygreygreygreygreyorangeorangegreygreygreygreygreyorangegreygreygreygreygreygreygreygreyorangeorangegreyorangegreygreygreygreygreygreyorangegreygreygreygreygreyorangeorangeorangegreygreygreyorangegreyorangeorangegreyorangeorangegreyorangegreygreygreygreygreygreygrey...greygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreyorangegreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreyorangegreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreyorangegreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygreygrey
alpha0.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.90.250.90.250.250.250.250.250.90.250.250.250.250.250.90.250.250.250.250.250.90.250.250.250.250.250.250.250.250.250.250.250.90.250.250.250.250.250.250.250.250.90.250.250.90.250.90.250.250.250.250.250.250.90.250.250.250.250.250.250.250.90.250.250.250.250.250.250.250.250.250.90.90.250.250.250.250.250.250.90.90.250.250.250.250.250.250.250.250.250.250.250.250.250.250.90.250.90.250.250.250.250.250.250.250.90.250.90.250.250.90.250.250.90.250.250.90.250.250.90.250.90.250.250.90.250.250.90.90.250.250.250.250.250.90.250.250.90.250.250.90.250.250.250.250.90.250.90.250.250.90.250.250.250.250.250.90.90.250.250.250.250.250.90.250.250.250.250.250.250.250.250.90.90.250.90.250.250.250.250.250.250.90.250.250.250.250.250.90.90.90.250.250.250.90.250.90.90.250.90.90.250.90.250.250.250.250.250.250.25...0.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.90.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.90.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.90.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.250.25
\n", "

2 rows × 12569 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 10 11 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12 13 14 15 16 17 18 19 20 21 22 23 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 24 25 26 27 28 29 30 31 32 33 34 35 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 36 37 38 39 40 41 42 43 44 45 46 \\\n", "color grey grey grey grey orange grey orange grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.9 0.25 0.9 0.25 0.25 0.25 0.25 \n", "\n", " 47 48 49 50 51 52 53 54 55 56 57 \\\n", "color grey orange grey grey grey grey grey orange grey grey grey \n", "alpha 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.9 0.25 0.25 0.25 \n", "\n", " 58 59 60 61 62 63 64 65 66 67 68 \\\n", "color grey grey orange grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 69 70 71 72 73 74 75 76 77 78 79 \\\n", "color grey grey grey orange grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 80 81 82 83 84 85 86 87 88 89 90 \\\n", "color grey orange grey grey orange grey orange grey grey grey grey \n", "alpha 0.25 0.9 0.25 0.25 0.9 0.25 0.9 0.25 0.25 0.25 0.25 \n", "\n", " 91 92 93 94 95 96 97 98 99 100 101 \\\n", "color grey grey orange grey grey grey grey grey grey grey orange \n", "alpha 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.9 \n", "\n", " 102 103 104 105 106 107 108 109 110 111 112 \\\n", "color grey grey grey grey grey grey grey grey grey orange orange \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.9 0.9 \n", "\n", " 113 114 115 116 117 118 119 120 121 122 123 \\\n", "color grey grey grey grey grey grey orange orange grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.9 0.9 0.25 0.25 0.25 \n", "\n", " 124 125 126 127 128 129 130 131 132 133 134 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 135 136 137 138 139 140 141 142 143 144 145 \\\n", "color orange grey orange grey grey grey grey grey grey grey orange \n", "alpha 0.9 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.9 \n", "\n", " 146 147 148 149 150 151 152 153 154 155 \\\n", "color grey orange grey grey orange grey grey orange grey grey \n", "alpha 0.25 0.9 0.25 0.25 0.9 0.25 0.25 0.9 0.25 0.25 \n", "\n", " 156 157 158 159 160 161 162 163 164 165 \\\n", "color orange grey grey orange grey orange grey grey orange grey \n", "alpha 0.9 0.25 0.25 0.9 0.25 0.9 0.25 0.25 0.9 0.25 \n", "\n", " 166 167 168 169 170 171 172 173 174 175 176 \\\n", "color grey orange orange grey grey grey grey grey orange grey grey \n", "alpha 0.25 0.9 0.9 0.25 0.25 0.25 0.25 0.25 0.9 0.25 0.25 \n", "\n", " 177 178 179 180 181 182 183 184 185 186 \\\n", "color orange grey grey orange grey grey grey grey orange grey \n", "alpha 0.9 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.9 0.25 \n", "\n", " 187 188 189 190 191 192 193 194 195 196 \\\n", "color orange grey grey orange grey grey grey grey grey orange \n", "alpha 0.9 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.9 \n", "\n", " 197 198 199 200 201 202 203 204 205 206 207 \\\n", "color orange grey grey grey grey grey orange grey grey grey grey \n", "alpha 0.9 0.25 0.25 0.25 0.25 0.25 0.9 0.25 0.25 0.25 0.25 \n", "\n", " 208 209 210 211 212 213 214 215 216 217 218 \\\n", "color grey grey grey grey orange orange grey orange grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.9 0.9 0.25 0.9 0.25 0.25 0.25 \n", "\n", " 219 220 221 222 223 224 225 226 227 228 229 \\\n", "color grey grey grey orange grey grey grey grey grey orange orange \n", "alpha 0.25 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.9 0.9 \n", "\n", " 230 231 232 233 234 235 236 237 238 239 \\\n", "color orange grey grey grey orange grey orange orange grey orange \n", "alpha 0.9 0.25 0.25 0.25 0.9 0.25 0.9 0.9 0.25 0.9 \n", "\n", " 240 241 242 243 244 245 246 247 248 249 ... \\\n", "color orange grey orange grey grey grey grey grey grey grey ... \n", "alpha 0.9 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.25 0.25 ... \n", "\n", " 12319 12320 12321 12322 12323 12324 12325 12326 12327 12328 12329 12330 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12331 12332 12333 12334 12335 12336 12337 12338 12339 12340 12341 12342 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12343 12344 12345 12346 12347 12348 12349 12350 12351 12352 12353 12354 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12355 12356 12357 12358 12359 12360 12361 12362 12363 12364 12365 \\\n", "color grey grey grey grey grey grey grey orange grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.9 0.25 0.25 0.25 \n", "\n", " 12366 12367 12368 12369 12370 12371 12372 12373 12374 12375 12376 12377 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12378 12379 12380 12381 12382 12383 12384 12385 12386 12387 12388 12389 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12390 12391 12392 12393 12394 12395 12396 12397 12398 12399 12400 \\\n", "color grey grey orange grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12401 12402 12403 12404 12405 12406 12407 12408 12409 12410 12411 12412 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12413 12414 12415 12416 12417 12418 12419 12420 12421 12422 12423 12424 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12425 12426 12427 12428 12429 12430 12431 12432 12433 12434 12435 12436 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12437 12438 12439 12440 12441 12442 12443 12444 12445 12446 12447 \\\n", "color grey grey grey grey orange grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.9 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12448 12449 12450 12451 12452 12453 12454 12455 12456 12457 12458 12459 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12460 12461 12462 12463 12464 12465 12466 12467 12468 12469 12470 12471 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12472 12473 12474 12475 12476 12477 12478 12479 12480 12481 12482 12483 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12484 12485 12486 12487 12488 12489 12490 12491 12492 12493 12494 12495 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12496 12497 12498 12499 12500 12501 12502 12503 12504 12505 12506 12507 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12508 12509 12510 12511 12512 12513 12514 12515 12516 12517 12518 12519 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12520 12521 12522 12523 12524 12525 12526 12527 12528 12529 12530 12531 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12532 12533 12534 12535 12536 12537 12538 12539 12540 12541 12542 12543 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12544 12545 12546 12547 12548 12549 12550 12551 12552 12553 12554 12555 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12556 12557 12558 12559 12560 12561 12562 12563 12564 12565 12566 12567 \\\n", "color grey grey grey grey grey grey grey grey grey grey grey grey \n", "alpha 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 \n", "\n", " 12568 \n", "color grey \n", "alpha 0.25 \n", "\n", "[2 rows x 12569 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[['color','alpha']].T" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# handle null values" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "ID 0\n", "imdbID 0\n", "Title 0\n", "Year 0\n", "mpaaRating 2216\n", "Runtime 0\n", "Genre 21\n", "Released 188\n", "Director 20\n", "Writer 717\n", "Cast 157\n", "imdbRating 20\n", "imdbVotes 21\n", "Language 229\n", "Country 177\n", "Oscars 0\n", "numericRating 0\n", "Meter 0\n", "Reviews 0\n", "Fresh 0\n", "Rotten 0\n", "userMeter 91\n", "userRating 211\n", "userReviews 12\n", "BoxOffice 8380\n", "Production 1104\n", "color 0\n", "alpha 0\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "ID 0\n", "imdbID 0\n", "Title 0\n", "Year 0\n", "mpaaRating 0\n", "Runtime 0\n", "Genre 0\n", "Released 0\n", "Director 0\n", "Writer 0\n", "Cast 0\n", "imdbRating 0\n", "imdbVotes 0\n", "Language 0\n", "Country 0\n", "Oscars 0\n", "numericRating 0\n", "Meter 0\n", "Reviews 0\n", "Fresh 0\n", "Rotten 0\n", "userMeter 0\n", "userRating 0\n", "userReviews 0\n", "BoxOffice 0\n", "Production 0\n", "color 0\n", "alpha 0\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.fillna(0, inplace=True) # just replace missing values with zero\n", "movies.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "movies[\"revenue\"] = movies.BoxOffice.apply(lambda x: '{:,d}'.format(int(x)))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 0\n", "2 0\n", "3 0\n", "4 0\n", "5 0\n", "6 0\n", "7 0\n", "8 0\n", "9 0\n", "10 0\n", "11 0\n", "12 0\n", "13 0\n", "14 0\n", "15 0\n", "16 0\n", "17 0\n", "18 0\n", "19 0\n", "20 0\n", "21 0\n", "22 0\n", "23 0\n", "24 51,000\n", "25 0\n", "26 0\n", "27 0\n", "28 0\n", "29 0\n", " ... \n", "12539 0\n", "12540 0\n", "12541 0\n", "12542 99,700\n", "12543 0\n", "12544 300,000\n", "12545 12,800\n", "12546 0\n", "12547 0\n", "12548 0\n", "12549 79,300\n", "12550 0\n", "12551 15,300\n", "12552 0\n", "12553 0\n", "12554 102,000,000\n", "12555 0\n", "12556 500,000\n", "12557 5,100\n", "12558 0\n", "12559 0\n", "12560 0\n", "12561 0\n", "12562 0\n", "12563 0\n", "12564 55,700,000\n", "12565 0\n", "12566 0\n", "12567 0\n", "12568 600,000\n", "Name: revenue, dtype: object" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies['revenue']" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(join(dirname(__file__), \"razzies-clean.csv\")) as f:\n", " razzies = f.read().splitlines()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "movies.loc[movies.imdbID.isin(razzies), \"color\"] = \"purple\"\n", "movies.loc[movies.imdbID.isin(razzies), \"alpha\"] = 0.9\n", "\n", "axis_map = {\n", " \"Tomato Meter\": \"Meter\",\n", " \"Numeric Rating\": \"numericRating\",\n", " \"Number of Reviews\": \"Reviews\",\n", " \"Box Office (dollars)\": \"BoxOffice\",\n", " \"Length (minutes)\": \"Runtime\",\n", " \"Year\": \"Year\",\n", "}\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# now let's work with Bokeh" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from bokeh.plotting import figure\n", "from bokeh.layouts import layout, widgetbox\n", "from bokeh.models import ColumnDataSource, HoverTool, Div\n", "from bokeh.models.widgets import Slider, Select, TextInput\n", "from bokeh.io import curdoc" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "desc = Div(text=open(join(dirname(__file__), \"description.html\")).read(), width=800)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create Input Controls" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "reviews = Slider(title=\"Minimum number of reviews\", value=80, start=10, end=300, step=10)\n", "min_year = Slider(title=\"Year released\", start=1940, end=2014, value=1970, step=1)\n", "max_year = Slider(title=\"End Year released\", start=1940, end=2014, value=2014, step=1)\n", "oscars = Slider(title=\"Minimum number of Oscar wins\", start=0, end=4, value=0, step=1)\n", "boxoffice = Slider(title=\"Dollars at Box Office (millions)\", start=0, end=800, value=0, step=1)\n", "genre = Select(title=\"Genre\", value=\"All\",\n", " options=open(join(dirname(__file__), 'genres.txt')).read().split())\n", "director = TextInput(title=\"Director name contains\")\n", "cast = TextInput(title=\"Cast names contains\")\n", "x_axis = Select(title=\"X Axis\", options=sorted(axis_map.keys()), value=\"Tomato Meter\")\n", "y_axis = Select(title=\"Y Axis\", options=sorted(axis_map.keys()), value=\"Number of Reviews\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create Column Data Source that will be used by the plot" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "source = ColumnDataSource(data=dict(x=[], y=[], color=[], title=[], year=[], revenue=[], alpha=[]))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "hover = HoverTool(tooltips=[\n", " (\"Title\", \"@title\"),\n", " (\"Year\", \"@year\"),\n", " (\"$\", \"@revenue\")\n", "])" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = figure(plot_height=600, plot_width=700, title=\"\", toolbar_location=None, tools=[hover])\n", "p.circle(x=\"x\", y=\"y\", source=source, size=7, color=\"color\", line_color=None, fill_alpha=\"alpha\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def select_movies():\n", " genre_val = genre.value\n", " director_val = director.value.strip()\n", " cast_val = cast.value.strip()\n", " selected = movies[\n", " (movies.Reviews >= reviews.value) &\n", " (movies.BoxOffice >= (boxoffice.value * 1e6)) &\n", " (movies.Year >= min_year.value) &\n", " (movies.Year <= max_year.value) &\n", " (movies.Oscars >= oscars.value)\n", " ]\n", " if (genre_val != \"All\"):\n", " selected = selected[selected.Genre.str.contains(genre_val)==True]\n", " if (director_val != \"\"):\n", " selected = selected[selected.Director.str.contains(director_val)==True]\n", " if (cast_val != \"\"):\n", " selected = selected[selected.Cast.str.contains(cast_val)==True]\n", " return selected" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\n", "def update():\n", " df = select_movies()\n", " x_name = axis_map[x_axis.value]\n", " y_name = axis_map[y_axis.value]\n", "\n", " p.xaxis.axis_label = x_axis.value\n", " p.yaxis.axis_label = y_axis.value\n", " p.title.text = \"%d movies selected\" % len(df)\n", " source.data = dict(\n", " x=df[x_name],\n", " y=df[y_name],\n", " color=df[\"color\"],\n", " title=df[\"Title\"],\n", " year=df[\"Year\"],\n", " revenue=df[\"revenue\"],\n", " alpha=df[\"alpha\"],\n", " )" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "controls = [reviews, boxoffice, genre, min_year, max_year, oscars, director, cast, x_axis, y_axis]\n", "for control in controls:\n", " control.on_change('value', lambda attr, old, new: update())\n", "\n", "sizing_mode = 'fixed' # 'scale_width' also looks nice with this example\n", "\n", "inputs = widgetbox(*controls, sizing_mode=sizing_mode)\n", "l = layout([\n", " [desc],\n", " [inputs, p],\n", "], sizing_mode=sizing_mode)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true }, "outputs": [], "source": [ "update() # initial load of the data\n" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "curdoc().add_root(l)\n", "curdoc().title = \"Movies\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }