{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Prince Grover" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from scipy import stats, integrate\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Data used is from `CORGIS Dataset Project`\n", "https://think.cs.vt.edu/corgis/csv/state_crime/state_crime.html" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# importing csv file using pandas\n", "state_crime = pd.read_csv('state_crime.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2751, 21)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_crime.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Population 0\n", "Rates.Property.All 0\n", "Rates.Property.Burglary 0\n", "Rates.Property.Larceny 0\n", "Rates.Property.Motor 0\n", "Rates.Violent.All 0\n", "Rates.Violent.Assault 0\n", "Rates.Violent.Murder 0\n", "Rates.Violent.Rape 0\n", "Rates.Violent.Robbery 0\n", "State 0\n", "Totals.Property.All 0\n", "Totals.Property.Burglary 0\n", "Totals.Property.Larceny 0\n", "Totals.Property.Motor 0\n", "Totals.Violent.All 0\n", "Totals.Violent.Assault 0\n", "Totals.Violent.Murder 0\n", "Totals.Violent.Rape 0\n", "Totals.Violent.Robbery 0\n", "Year 0\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# nulls?\n", "state_crime.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Population 2519\n", "Rates.Property.All 2665\n", "Rates.Property.Burglary 2479\n", "Rates.Property.Larceny 2639\n", "Rates.Property.Motor 2244\n", "Rates.Violent.All 2333\n", "Rates.Violent.Assault 2119\n", "Rates.Violent.Murder 217\n", "Rates.Violent.Rape 624\n", "Rates.Violent.Robbery 1707\n", "State 52\n", "Totals.Property.All 2735\n", "Totals.Property.Burglary 2704\n", "Totals.Property.Larceny 2730\n", "Totals.Property.Motor 2603\n", "Totals.Violent.All 2631\n", "Totals.Violent.Assault 2541\n", "Totals.Violent.Murder 938\n", "Totals.Violent.Rape 1723\n", "Totals.Violent.Robbery 2300\n", "Year 53\n", "dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# unique in each column\n", "state_crime.nunique()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Population | \n", "Rates.Property.All | \n", "Rates.Property.Burglary | \n", "Rates.Property.Larceny | \n", "Rates.Property.Motor | \n", "Rates.Violent.All | \n", "Rates.Violent.Assault | \n", "Rates.Violent.Murder | \n", "Rates.Violent.Rape | \n", "Rates.Violent.Robbery | \n", "... | \n", "Totals.Property.All | \n", "Totals.Property.Burglary | \n", "Totals.Property.Larceny | \n", "Totals.Property.Motor | \n", "Totals.Violent.All | \n", "Totals.Violent.Assault | \n", "Totals.Violent.Murder | \n", "Totals.Violent.Rape | \n", "Totals.Violent.Robbery | \n", "Year | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "3266740 | \n", "1035.4 | \n", "355.9 | \n", "592.1 | \n", "87.3 | \n", "186.6 | \n", "138.1 | \n", "12.4 | \n", "8.6 | \n", "27.5 | \n", "... | \n", "33823 | \n", "11626 | \n", "19344 | \n", "2853 | \n", "6097 | \n", "4512 | \n", "406 | \n", "281 | \n", "898 | \n", "1960 | \n", "
1 | \n", "3302000 | \n", "985.5 | \n", "339.3 | \n", "569.4 | \n", "76.8 | \n", "168.5 | \n", "128.9 | \n", "12.9 | \n", "7.6 | \n", "19.1 | \n", "... | \n", "32541 | \n", "11205 | \n", "18801 | \n", "2535 | \n", "5564 | \n", "4255 | \n", "427 | \n", "252 | \n", "630 | \n", "1961 | \n", "
2 | \n", "3358000 | \n", "1067.0 | \n", "349.1 | \n", "634.5 | \n", "83.4 | \n", "157.3 | \n", "119.0 | \n", "9.4 | \n", "6.5 | \n", "22.5 | \n", "... | \n", "35829 | \n", "11722 | \n", "21306 | \n", "2801 | \n", "5283 | \n", "3995 | \n", "316 | \n", "218 | \n", "754 | \n", "1962 | \n", "
3 | \n", "3347000 | \n", "1150.9 | \n", "376.9 | \n", "683.4 | \n", "90.6 | \n", "182.7 | \n", "142.1 | \n", "10.2 | \n", "5.7 | \n", "24.7 | \n", "... | \n", "38521 | \n", "12614 | \n", "22874 | \n", "3033 | \n", "6115 | \n", "4755 | \n", "340 | \n", "192 | \n", "828 | \n", "1963 | \n", "
4 | \n", "3407000 | \n", "1358.7 | \n", "466.6 | \n", "784.1 | \n", "108.0 | \n", "213.1 | \n", "163.0 | \n", "9.3 | \n", "11.7 | \n", "29.1 | \n", "... | \n", "46290 | \n", "15898 | \n", "26713 | \n", "3679 | \n", "7260 | \n", "5555 | \n", "316 | \n", "397 | \n", "992 | \n", "1964 | \n", "
5 rows × 21 columns
\n", "old_new | \n", "new | \n", "old | \n", "
---|---|---|
State | \n", "\n", " | \n", " |
California | \n", "12 | \n", "41 | \n", "
Florida | \n", "12 | \n", "41 | \n", "
New York | \n", "12 | \n", "36 | \n", "
Texas | \n", "12 | \n", "41 | \n", "