{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Introduction to the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv(\"academy_awards.csv\", encoding=\"ISO-8859-1\")\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Filtering the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df[\"Year\"] = df[\"Year\"].str[0:4]\n", "df[\"Year\"] = df[\"Year\"].astype(\"int64\")\n", "later_than_2000 = df[df[\"Year\"] > 2000]\n", "award_categories = [\"Actor -- Leading Role\",\"Actor -- Supporting Role\", \"Actress -- Leading Role\", \"Actress -- Supporting Role\"]\n", "nominations = later_than_2000[later_than_2000[\"Category\"].isin(award_categories)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Cleaning up the Won? and Unnamed columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "replacements = { \"NO\": 0, \"YES\": 1 }\n", "nominations[\"Won?\"] = nominations[\"Won?\"].map(replacements)\n", "nominations[\"Won\"] = nominations[\"Won?\"]\n", "drop_cols = [\"Won?\",\"Unnamed: 5\", \"Unnamed: 6\",\"Unnamed: 7\", \"Unnamed: 8\", \"Unnamed: 9\", \"Unnamed: 10\"]\n", "final_nominations = nominations.drop(drop_cols, axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Cleaning up the Additional Info column" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "additional_info_one = final_nominations[\"Additional Info\"].str.rstrip(\"'}\")\n", "additional_info_two = additional_info_one.str.split(\" {'\")\n", "movie_names = additional_info_two.str[0]\n", "characters = additional_info_two.str[1]\n", "final_nominations[\"Movie\"] = movie_names\n", "final_nominations[\"Character\"] = characters\n", "final_nominations = final_nominations.drop(\"Additional Info\", axis=1)\n", "final_nominations" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exporting to SQLite" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import sqlite3\n", "conn = sqlite3.connect(\"nominations.db\")\n", "final_nominations.to_sql(\"nominations\", conn, index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Verifying in SQL" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "query_one = \"pragma table_info(nominations);\"\n", "query_two = \"select * from nominations limit 10;\"\n", "print(conn.execute(query_one).fetchall())\n", "print(conn.execute(query_two).fetchall())\n", "conn.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.0" } }, "nbformat": 4, "nbformat_minor": 0 }