{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "65c6136d", "metadata": {}, "outputs": [], "source": [ "#main\n", "import pandas as pd\n", "import numpy as np\n", "import math\n", "import sklearn\n", "import re\n", "\n", "#graphic\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns \n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "id": "68553d29", "metadata": {}, "outputs": [], "source": [ "pd.set_option('display.max_columns', None)\n", "pd.set_option('display.max_rows', None)" ] }, { "cell_type": "code", "execution_count": 3, "id": "054011ff", "metadata": {}, "outputs": [], "source": [ "movie_overview_2022 = pd.read_csv('./movies_2022.csv', index_col='Unnamed: 0')\n", "movie_detail_2022 = pd.read_csv('./movie_details_2022.csv', index_col='Unnamed: 0')\n", "movie_award_2022 = pd.read_csv('./awards_2022.csv', index_col='Unnamed: 0')\n", "top_1000_movies = pd.read_csv('./imdb_top_1000.csv')\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "f43f1bf2", "metadata": {}, "outputs": [], "source": [ "movie_overview_2022 = movie_overview_2022.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ef9573a7", "metadata": {}, "outputs": [], "source": [ "movie_detail_2022.drop(['title','movie_id','movie_imdb_link' ], axis=1, inplace=True) # drop duplicate columns" ] }, { "cell_type": "code", "execution_count": 6, "id": "60002f92", "metadata": {}, "outputs": [], "source": [ "movie_award_2022.drop(['title','movie_id'], axis=1, inplace=True) # drop duplicate columns" ] }, { "cell_type": "code", "execution_count": 7, "id": "f70eca32", "metadata": {}, "outputs": [], "source": [ "movies_df = pd.concat([movie_overview_2022, movie_award_2022, movie_detail_2022 ],axis = 1)\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "086eb0c9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(477, 26)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df.shape" ] }, { "cell_type": "code", "execution_count": 9, "id": "732abe68", "metadata": {}, "outputs": [], "source": [ "def separete_awards(df):\n", " df['total_award_nominations'] = '' #creating empty columns\n", " df['total_award_wins'] = ''\n", "\n", " for i in df.index:\n", " if df['awards_total'][i] != '0':\n", " pattern= '\\d+'\n", " numbers = re.findall(pattern,df['awards_total'][i])\n", " df['total_award_wins'][i] = numbers[0] \n", " df['total_award_nominations'][i] = numbers[1] \n", " \n", " else:\n", " df['total_award_wins'][i] = df['awards_total'][i]\n", " df['total_award_nominations'][i] = df['awards_total'][i]\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": 10, "id": "0c400730", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/2812189874.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['total_award_wins'][i] = numbers[0]\n", "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/2812189874.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['total_award_nominations'][i] = numbers[1]\n", "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/2812189874.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['total_award_wins'][i] = df['awards_total'][i]\n", "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/2812189874.py:14: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['total_award_nominations'][i] = df['awards_total'][i]\n" ] } ], "source": [ "movies_df_v1 = separete_awards(movies_df)" ] }, { "cell_type": "code", "execution_count": 11, "id": "4391f073", "metadata": {}, "outputs": [], "source": [ "def separete_genre(df):\n", " df['primary_genre'] = '' #creating empty columns\n", " df['secondary_genre'] = ''\n", " \n", " pattern = \"[\\w']+\"\n", " \n", " for i in df.index:\n", " if df['genre'][i] != '[]':\n", " g = re.findall(pattern,df['genre'][i])\n", " \n", " if len(g)>1:\n", " df['primary_genre'][i] = g[0] \n", " df['secondary_genre'][i] = g[1]\n", " else:\n", " df['primary_genre'][i] = df['genre'][i]\n", " df['secondary_genre'][i] = 'Other'\n", " else:\n", " df['primary_genre'][i] = ''\n", " df['secondary_genre'][i] = ''\n", " \n", " return df\n", " " ] }, { "cell_type": "code", "execution_count": 12, "id": "9f7b7559", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/4184901931.py:12: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['primary_genre'][i] = g[0]\n", "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/4184901931.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['secondary_genre'][i] = g[1]\n", "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/4184901931.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['primary_genre'][i] = df['genre'][i]\n", "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/4184901931.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['secondary_genre'][i] = 'Other'\n" ] } ], "source": [ "movies_df_v2 = separete_genre(movies_df_v1)" ] }, { "cell_type": "code", "execution_count": 13, "id": "7c71fbf9", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['certificate']= movies_df_v2['certificate'].replace({\n", " 'R':'Adult',\n", " 'PG-13':'+13/14', \n", " 'TV-13':'+13/14',\n", " 'TV-MA':'Adult',\n", " 'PG': 'Parental Guidance',\n", " 'TV-14': '+13/14',\n", " 'Unrated': 'Not Rated',\n", " 'Approved': 'Not Rated',\n", " 'TV-PG': 'Parental Guidance',\n", " 'TV-G': 'Kids',\n", " 'G': 'Suitable for all',\n", " 'TV-Y7': 'Kids',\n", " '18': 'Adult',\n", " 'TV-Y': 'Kids',\n", " 'TV-Y7-FV': 'Kids',\n", " 'T':'Adult',\n", " 'M': 'Adult'\n", "})" ] }, { "cell_type": "code", "execution_count": 14, "id": "ede6be51", "metadata": {}, "outputs": [], "source": [ "def drop_k(df):\n", " \n", " pattern = \"K\"\n", " \n", " for i in df.index:\n", " if df['num_user_reviews'][i] != '[]':\n", " review = re.findall(pattern,df['num_user_reviews'][i]) \n", " if len(review)>0:\n", " a = df['num_user_reviews'][i].replace('K','') \n", " df['num_user_reviews'][i] = float(a)*1000\n", " \n", " else:\n", " df['num_user_reviews'][i] = df['num_user_reviews'][i]\n", " else:\n", " df['num_user_reviews'][i] = 0\n", " \n", " df['num_user_reviews'] = pd.to_numeric(df['num_user_reviews'], errors='coerce')\n", " \n", " return df\n", " " ] }, { "cell_type": "code", "execution_count": 15, "id": "549a8a4a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/3710906870.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['num_user_reviews'][i] = float(a)*1000\n", "/var/folders/33/p_3l01b14g96rn22vzwly2g00000gn/T/ipykernel_64253/3710906870.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['num_user_reviews'][i] = df['num_user_reviews'][i]\n" ] } ], "source": [ "movies_df_v2 = drop_k(movies_df_v2)" ] }, { "cell_type": "code", "execution_count": 16, "id": "2543ee87", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['num_critic_reviews'] = pd.to_numeric(movies_df_v2['num_critic_reviews'], errors='coerce')" ] }, { "cell_type": "code", "execution_count": null, "id": "f1897392", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['release_date'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 18, "id": "b802f866", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['release_date']= movies_df_v2['release_date'].replace({\n", " '2022':'',\n", " '2023':''\n", "})" ] }, { "cell_type": "code", "execution_count": 19, "id": "6623ce1b", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['release_date']=pd.to_datetime(movies_df_v2['release_date'], errors='coerce')" ] }, { "cell_type": "code", "execution_count": null, "id": "84460bf9", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['release_date'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 21, "id": "f0846511", "metadata": {}, "outputs": [], "source": [ "import datetime as dt\n", "\n", "movies_df_v2['release_weekday'] = movies_df_v2['release_date'].dt.isocalendar().day # weekday 5 is a friday\n" ] }, { "cell_type": "code", "execution_count": 22, "id": "c2e636c1", "metadata": {}, "outputs": [], "source": [ "movies_df_v2 = movies_df_v2.drop(['release_month'], axis=1) #dropping the original month column and \n", "#creating one from datetime. The original release_month column has more nonsensical values\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "74f4336e", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['release_month'] = pd.DatetimeIndex(movies_df_v2['release_date']).month\n" ] }, { "cell_type": "code", "execution_count": 24, "id": "a4241d70", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9.0 57\n", "8.0 54\n", "10.0 54\n", "2.0 49\n", "3.0 45\n", "6.0 41\n", "7.0 40\n", "4.0 40\n", "1.0 39\n", "5.0 32\n", "11.0 14\n", "12.0 7\n", "NaN 5\n", "Name: release_month, dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df_v2['release_month'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 25, "id": "10eb3ce8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2022 477\n", "Name: release_year, dtype: int64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df_v2['release_year'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 26, "id": "37a53771", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "United States 426\n", "United Kingdom 19\n", "Canada 4\n", "Ireland 4\n", "Australia 3\n", "Japan 2\n", "China 2\n", "Italy 2\n", "Spain 2\n", "Germany 2\n", "Mexico 2\n", "United Arab Emirates 1\n", "Puerto Rico 1\n", "Portugal 1\n", "France 1\n", "South Korea 1\n", "Poland 1\n", "Morocco 1\n", "Hungary 1\n", "Switzerland 1\n", "Name: country_of_origin, dtype: int64" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df_v2['country_of_origin'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "5f0ad2a5", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['metascore'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 28, "id": "26360c9f", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['metascore']= movies_df_v2['metascore'].replace({\n", " '[]':'Not scored' \n", "})\n" ] }, { "cell_type": "code", "execution_count": 29, "id": "9e9e0181", "metadata": {}, "outputs": [], "source": [ "def remove_currency(row): \n", " if type(row) != float:\n", " row = row.replace('$', '')\n", " if '€' in row:\n", " row = row.replace('€', '').strip() # no cunversion needed as of date\n", " if 'CA' in row:\n", " row = row.replace('CA', '').strip()\n", " row = str(int(row)*0.73) \n", " if '₹' in row:\n", " row = row.replace('₹', '').strip()\n", " row = str(int(row)*0.012) \n", " if '£' in row:\n", " row = row.replace('£', '').strip()\n", " row = str(int(row)*1.14)\n", " if 'CN¥' in row:\n", " row = row.replace('CN¥', '').strip()\n", " row = str(int(row)*0.0067)\n", " if 'RUR' in row:\n", " row = row.replace('RUR', '').strip()\n", " row = str(int(row)*0.016)\n", " \n", " return row\n", "\n", "movies_df_v2['budget_in_usd'] = movies_df_v2['budget_in_usd'].apply(remove_currency)" ] }, { "cell_type": "code", "execution_count": 30, "id": "3aaedee0", "metadata": {}, "outputs": [], "source": [ "def remove_nonsensical(row): \n", " if type(row) != float:\n", " if ':' in row:\n", " row = np.nan\n", " \n", " return row" ] }, { "cell_type": "code", "execution_count": 31, "id": "c6efe99c", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['opening_weekend_us_can_in_usd'] = movies_df_v2['opening_weekend_us_can_in_usd'].apply(remove_nonsensical)" ] }, { "cell_type": "code", "execution_count": 32, "id": "e73c6f7e", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['gross_us_can_in_usd'] = movies_df_v2['gross_us_can_in_usd'].apply(remove_nonsensical)" ] }, { "cell_type": "code", "execution_count": 33, "id": "9cc752b8", "metadata": {}, "outputs": [], "source": [ "movies_df_v2['gross_worldwide_in_usd'] = movies_df_v2['gross_worldwide_in_usd'].apply(remove_nonsensical)\n" ] }, { "cell_type": "code", "execution_count": 34, "id": "d8320a8a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "movie_id 0\n", "movie_imdb_link 0\n", "certificate 0\n", "runtime_in_mins 0\n", "genre 0\n", "imdb_rating 0\n", "number_of_votes 0\n", "metascore 0\n", "top_director 0\n", "release_year 0\n", "awards_link 0\n", "awards_total 0\n", "top_writer 0\n", "top_star_1 0\n", "top_star_2 0\n", "top_star_3 0\n", "num_user_reviews 0\n", "num_critic_reviews 20\n", "release_date 5\n", "country_of_origin 0\n", "top_production_company 0\n", "budget_in_usd 0\n", "opening_weekend_us_can_in_usd 136\n", "gross_us_can_in_usd 4\n", "gross_worldwide_in_usd 97\n", "total_award_nominations 0\n", "total_award_wins 0\n", "primary_genre 0\n", "secondary_genre 0\n", "release_weekday 5\n", "release_month 5\n", "dtype: int64" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df_v2.isna().sum()" ] }, { "cell_type": "code", "execution_count": 35, "id": "a0d3ae7c", "metadata": {}, "outputs": [], "source": [ "movies_df_v2.replace('',np.nan, inplace=True)\n", "movies_df_v2.replace('[]',np.nan, inplace=True)" ] }, { "cell_type": "code", "execution_count": 36, "id": "c7b42308", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "movie_id 0\n", "movie_imdb_link 0\n", "certificate 124\n", "runtime_in_mins 2\n", "genre 0\n", "imdb_rating 0\n", "number_of_votes 0\n", "metascore 0\n", "top_director 0\n", "release_year 0\n", "awards_link 0\n", "awards_total 0\n", "top_writer 1\n", "top_star_1 3\n", "top_star_2 3\n", "top_star_3 3\n", "num_user_reviews 0\n", "num_critic_reviews 20\n", "release_date 5\n", "country_of_origin 0\n", "top_production_company 270\n", "budget_in_usd 258\n", "opening_weekend_us_can_in_usd 370\n", "gross_us_can_in_usd 359\n", "gross_worldwide_in_usd 278\n", "total_award_nominations 0\n", "total_award_wins 0\n", "primary_genre 0\n", "secondary_genre 0\n", "release_weekday 5\n", "release_month 5\n", "dtype: int64" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df_v2.isna().sum()" ] }, { "cell_type": "code", "execution_count": 37, "id": "9f1f2f8b", "metadata": {}, "outputs": [], "source": [ "cols_numeric = ['runtime_in_mins','num_user_reviews', 'metascore','budget_in_usd', 'opening_weekend_us_can_in_usd',\n", " 'gross_us_can_in_usd', 'gross_worldwide_in_usd','total_award_nominations',\n", " 'total_award_wins']\n", "\n", "movies_df_v2[cols_numeric] = movies_df_v2[cols_numeric].apply(pd.to_numeric, errors='coerce', axis=1)\n" ] }, { "cell_type": "code", "execution_count": 38, "id": "a6c10a55", "metadata": {}, "outputs": [], "source": [ "cols_object = ['release_year','release_month', 'release_weekday']\n", "\n", "movies_df_v2[cols_object] = movies_df_v2[cols_object].astype(object)" ] }, { "cell_type": "code", "execution_count": 39, "id": "d2cfb3e8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "movie_id 0\n", "movie_imdb_link 0\n", "certificate 124\n", "runtime_in_mins 2\n", "genre 0\n", "imdb_rating 0\n", "number_of_votes 0\n", "metascore 223\n", "top_director 0\n", "release_year 0\n", "awards_link 0\n", "awards_total 0\n", "top_writer 1\n", "top_star_1 3\n", "top_star_2 3\n", "top_star_3 3\n", "num_user_reviews 0\n", "num_critic_reviews 20\n", "release_date 5\n", "country_of_origin 0\n", "top_production_company 270\n", "budget_in_usd 258\n", "opening_weekend_us_can_in_usd 370\n", "gross_us_can_in_usd 359\n", "gross_worldwide_in_usd 425\n", "total_award_nominations 0\n", "total_award_wins 0\n", "primary_genre 0\n", "secondary_genre 0\n", "release_weekday 5\n", "release_month 5\n", "dtype: int64" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df_v2.isna().sum()" ] }, { "cell_type": "code", "execution_count": 40, "id": "991a52b5", "metadata": {}, "outputs": [], "source": [ "# Feature engineering" ] }, { "cell_type": "code", "execution_count": 41, "id": "2e1917a5", "metadata": {}, "outputs": [], "source": [ "## Oscars proved to be useless for the model." ] }, { "cell_type": "code", "execution_count": 42, "id": "b42e90d1", "metadata": {}, "outputs": [], "source": [ "## Director" ] }, { "cell_type": "code", "execution_count": 43, "id": "737e949d", "metadata": {}, "outputs": [], "source": [ "top_1000_movies_copy = top_1000_movies.copy()\n", "best_directors =top_1000_movies_copy.pivot_table(index= ['Director'], aggfunc = ['count'])\n", "best_directors.columns = best_directors.columns.droplevel(0)\n", "best_directors = best_directors.reset_index().rename_axis(None, axis=1)\n" ] }, { "cell_type": "code", "execution_count": 44, "id": "960e61a2", "metadata": {}, "outputs": [], "source": [ "best_directors = best_directors.drop(['Genre', 'Gross', 'IMDB_Rating',\n", " 'Meta_score', 'No_of_Votes', 'Overview', 'Poster_Link', 'Released_Year',\n", " 'Runtime', 'Series_Title', 'Star1', 'Star2', 'Star3', 'Star4'], axis=1)\n", "\n", "best_directors = best_directors.rename(columns={'Director':'top_director',\n", " 'Certificate':'is_among_best_director'})\n", "\n", "best_directors['is_among_best_director'] = 'Y'" ] }, { "cell_type": "code", "execution_count": 45, "id": "54907d9c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(548, 2)" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "best_directors.shape\n" ] }, { "cell_type": "code", "execution_count": 46, "id": "900227e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(477, 31)" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_df_v2.shape" ] }, { "cell_type": "code", "execution_count": 47, "id": "296f8e01", "metadata": {}, "outputs": [], "source": [ "movies_df_v10 = pd.merge(movies_df_v2, best_directors, how='left', on='top_director')" ] }, { "cell_type": "code", "execution_count": 48, "id": "dad81a79", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | movie_id | \n", "movie_imdb_link | \n", "certificate | \n", "runtime_in_mins | \n", "genre | \n", "imdb_rating | \n", "number_of_votes | \n", "metascore | \n", "top_director | \n", "release_year | \n", "awards_link | \n", "awards_total | \n", "top_writer | \n", "top_star_1 | \n", "top_star_2 | \n", "top_star_3 | \n", "num_user_reviews | \n", "num_critic_reviews | \n", "release_date | \n", "country_of_origin | \n", "top_production_company | \n", "budget_in_usd | \n", "opening_weekend_us_can_in_usd | \n", "gross_us_can_in_usd | \n", "gross_worldwide_in_usd | \n", "total_award_nominations | \n", "total_award_wins | \n", "primary_genre | \n", "secondary_genre | \n", "release_weekday | \n", "release_month | \n", "is_among_best_director | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
467 | \n", "tt13429928 | \n", "https://www.imdb.com/title/tt13429928/?ref_=ad... | \n", "Not Rated | \n", "87.0 | \n", "Action, Adventure, History | \n", "3.2 | \n", "210.0 | \n", "NaN | \n", "Steven Luke | \n", "2022 | \n", "https://www.imdb.com/title/tt13429928/awards/?... | \n", "0 | \n", "Steven Luke | \n", "Hiram A. Murray | \n", "Andrew Stecker | \n", "Apostolos Gliarmis | \n", "11.0 | \n", "5.0 | \n", "2022-10-06 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "Action | \n", "Adventure | \n", "4 | \n", "10.0 | \n", "NaN | \n", "
468 | \n", "tt11939970 | \n", "https://www.imdb.com/title/tt11939970/?ref_=ad... | \n", "NaN | \n", "80.0 | \n", "Drama, Horror, Mystery | \n", "2.4 | \n", "209.0 | \n", "NaN | \n", "Kameron Hale | \n", "2022 | \n", "https://www.imdb.com/title/tt11939970/awards/?... | \n", "Showing all 1 win and 3 nominations | \n", "Scott Hale | \n", "Miranda Nieman | \n", "Hayley Sunshine | \n", "Scott Hale | \n", "22.0 | \n", "7.0 | \n", "2022-01-18 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "3.0 | \n", "1.0 | \n", "Drama | \n", "Horror | \n", "2 | \n", "1.0 | \n", "NaN | \n", "
469 | \n", "tt12907932 | \n", "https://www.imdb.com/title/tt12907932/?ref_=ad... | \n", "NaN | \n", "82.0 | \n", "Horror, Thriller | \n", "2.5 | \n", "208.0 | \n", "NaN | \n", "Kipp Tribble | \n", "2022 | \n", "https://www.imdb.com/title/tt12907932/awards/?... | \n", "0 | \n", "Kipp Tribble | \n", "Andi Sweeney Blanco | \n", "Richard Siegelman | \n", "Kipp Tribble | \n", "10.0 | \n", "11.0 | \n", "2022-01-21 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "Horror | \n", "Thriller | \n", "5 | \n", "1.0 | \n", "NaN | \n", "
470 | \n", "tt9093076 | \n", "https://www.imdb.com/title/tt9093076/?ref_=adv... | \n", "NaN | \n", "72.0 | \n", "Horror | \n", "3.7 | \n", "207.0 | \n", "NaN | \n", "Kurtis Spieler | \n", "2022 | \n", "https://www.imdb.com/title/tt9093076/awards/?r... | \n", "0 | \n", "Kurtis Spieler | \n", "Laura Dooling | \n", "Adrienne King | \n", "Frank Wihbey | \n", "13.0 | \n", "15.0 | \n", "2022-08-09 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "Horror | \n", "Other | \n", "2 | \n", "8.0 | \n", "NaN | \n", "
471 | \n", "tt14555908 | \n", "https://www.imdb.com/title/tt14555908/?ref_=ad... | \n", "NaN | \n", "86.0 | \n", "Comedy | \n", "5.3 | \n", "205.0 | \n", "NaN | \n", "Andrew Nackman | \n", "2022 | \n", "https://www.imdb.com/title/tt14555908/awards/?... | \n", "Showing all 0 wins and 1 nomination | \n", "Jake Greene | \n", "Ethan Dizon | \n", "Madison Wolfe | \n", "Bernard White | \n", "9.0 | \n", "10.0 | \n", "2022-05-24 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "0.0 | \n", "Comedy | \n", "Other | \n", "2 | \n", "5.0 | \n", "NaN | \n", "
472 | \n", "tt19511880 | \n", "https://www.imdb.com/title/tt19511880/?ref_=ad... | \n", "NaN | \n", "NaN | \n", "Reality-TV | \n", "9.1 | \n", "204.0 | \n", "NaN | \n", "Diane Paloma Eskenazi | \n", "2022 | \n", "https://www.imdb.com/title/tt19511880/awards/?... | \n", "0 | \n", "Nino Dalakishvili | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "NaN | \n", "2022-05-01 | \n", "United States | \n", "NaN | \n", "50000.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "Reality | \n", "TV | \n", "7 | \n", "5.0 | \n", "NaN | \n", "
473 | \n", "tt10696116 | \n", "https://www.imdb.com/title/tt10696116/?ref_=ad... | \n", "NaN | \n", "92.0 | \n", "Horror | \n", "7.9 | \n", "204.0 | \n", "NaN | \n", "John Ainslie | \n", "2022 | \n", "https://www.imdb.com/title/tt10696116/awards/?... | \n", "0 | \n", "John Ainslie | \n", "Kimberly Laferriere | \n", "Rogan Christopher | \n", "Janet Porter | \n", "3.0 | \n", "26.0 | \n", "2022-08-19 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "Horror | \n", "Other | \n", "5 | \n", "8.0 | \n", "NaN | \n", "
474 | \n", "tt3447590 | \n", "https://www.imdb.com/title/tt3447590/?ref_=adv... | \n", "Parental Guidance | \n", "117.0 | \n", "Comedy, Drama, Family | \n", "6.1 | \n", "201.0 | \n", "67.0 | \n", "Matthew Warchus | \n", "2022 | \n", "https://www.imdb.com/title/tt3447590/awards/?r... | \n", "0 | \n", "Roald Dahl | \n", "Stephen Graham | \n", "Emma Thompson | \n", "Andrea Riseborough | \n", "15.0 | \n", "15.0 | \n", "2022-12-25 | \n", "United Kingdom | \n", "Working Title Films | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "Comedy | \n", "Drama | \n", "7 | \n", "12.0 | \n", "Y | \n", "
475 | \n", "tt20861742 | \n", "https://www.imdb.com/title/tt20861742/?ref_=ad... | \n", "NaN | \n", "78.0 | \n", "Horror | \n", "4.5 | \n", "200.0 | \n", "NaN | \n", "Brendan Rudnicki | \n", "2022 | \n", "https://www.imdb.com/title/tt20861742/awards/?... | \n", "Showing all 1 win and 4 nominations | \n", "Brendan Rudnicki | \n", "Walter Braithwaite | \n", "Dylan DeVane | \n", "Brent Downs | \n", "101.0 | \n", "4.0 | \n", "2022-07-29 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.0 | \n", "1.0 | \n", "Horror | \n", "Other | \n", "5 | \n", "7.0 | \n", "NaN | \n", "
476 | \n", "tt12885770 | \n", "https://www.imdb.com/title/tt12885770/?ref_=ad... | \n", "Adult | \n", "90.0 | \n", "Horror, Sci-Fi, Thriller | \n", "2.2 | \n", "200.0 | \n", "NaN | \n", "Lance Kawas | \n", "2022 | \n", "https://www.imdb.com/title/tt12885770/awards/?... | \n", "0 | \n", "Lance Kawas | \n", "Brande Roderick | \n", "Donald Cerrone | \n", "Kelly Lynn Reiter | \n", "9.0 | \n", "3.0 | \n", "2022-10-11 | \n", "United States | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "Horror | \n", "Sci | \n", "2 | \n", "10.0 | \n", "NaN | \n", "