import ast import json import matplotlib.pyplot as plt import pandas as pd import sys import os studentid = os.path.basename(sys.modules[__name__].__file__) ################################################# # Your personal methods can be here ... ################################################# def log(question, output_df, other): print("--------------- {}----------------".format(question)) if other is not None: print(question, other) if output_df is not None: print(output_df.head(5).to_string()) def question_1(movies, credits): """ :param movies: the path for the movie.csv file :param credits: the path for the credits.csv file :return: df1 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 1", output_df=df1, other=df1.shape) return df1 def question_2(df1): """ :param df1: the dataframe created in question 1 :return: df2 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 2", output_df=df2, other=(len(df2.columns), sorted(df2.columns))) return df2 def question_3(df2): """ :param df2: the dataframe created in question 2 :return: df3 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 3", output_df=df3, other=df3.index.name) return df3 def question_4(df3): """ :param df3: the dataframe created in question 3 :return: df4 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 4", output_df=df4, other=(df4['budget'].min(), df4['budget'].max(), df4['budget'].mean())) return df4 def question_5(df4): """ :param df4: the dataframe created in question 4 :return: df5 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 5", output_df=df5, other=(df5['success_impact'].min(), df5['success_impact'].max(), df5['success_impact'].mean())) return df5 def question_6(df5): """ :param df5: the dataframe created in question 5 :return: df6 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 6", output_df=df6, other=(df6['popularity'].min(), df6['popularity'].max(), df6['popularity'].mean())) return df6 def question_7(df6): """ :param df6: the dataframe created in question 6 :return: df7 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 7", output_df=df7, other=df7['popularity'].dtype) return df7 def question_8(df7): """ :param df7: the dataframe created in question 7 :return: df8 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 8", output_df=df8, other=df8["cast"].head(10).values) return df8 def question_9(df8): """ :param df9: the dataframe created in question 8 :return: movies Data Type: List of strings (movie titles) Please read the assignment specs to know how to create the output """ ################################################# # Your code goes here ... ################################################# log("QUESTION 9", output_df=None, other=movies) return movies def question_10(df8): """ :param df8: the dataframe created in question 8 :return: df10 Data Type: Dataframe Please read the assignment specs to know how to create the output dataframe """ ################################################# # Your code goes here ... ################################################# log("QUESTION 10", output_df=df10, other=df10["release_date"].head(5).to_string().replace("\n", " ")) return df10 def question_11(df10): """ :param df10: the dataframe created in question 10 :return: nothing, but saves the figure on the disk """ ################################################# # Your code goes here ... ################################################# plt.savefig("{}-Q11.png".format(studentid)) def question_12(df10): """ :param df10: the dataframe created in question 10 :return: nothing, but saves the figure on the disk """ ################################################# # Your code goes here ... ################################################# plt.savefig("{}-Q12.png".format(studentid)) def question_13(df10): """ :param df10: the dataframe created in question 10 :return: nothing, but saves the figure on the disk """ ################################################# # Your code goes here ... ################################################# plt.savefig("{}-Q13.png".format(studentid)) if __name__ == "__main__": df1 = question_1("movies.csv", "credits.csv") df2 = question_2(df1) df3 = question_3(df2) df4 = question_4(df3) df5 = question_5(df4) df6 = question_6(df5) df7 = question_7(df6) df8 = question_8(df7) movies = question_9(df8) df10 = question_10(df8) question_11(df10) question_12(df10) question_13(df10)