{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import json\n", "import pandas as pd\n", "import numpy as np\n", "import missingno as msno\n", "from sklearn.impute import SimpleImputer\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [], "source": [ "with open('../dtypes.json', 'r') as jsonfile:\n", " dtyp = json.load(jsonfile)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 'float',\n", " 'flag_tsunami': 'str',\n", " 'year': 'float',\n", " 'month': 'float',\n", " 'day': 'float',\n", " 'hour': 'float',\n", " 'minute': 'float',\n", " 'second': 'float',\n", " 'focal_depth': 'float',\n", " 'eq_primary': 'float',\n", " 'eq_mag_mw': 'float',\n", " 'eq_mag_ms': 'float',\n", " 'eq_mag_mb': 'float',\n", " 'intensity': 'float',\n", " 'country': 'str',\n", " 'state': 'str',\n", " 'location_name': 'str',\n", " 'latitude': 'float',\n", " 'longitude': 'float',\n", " 'region_code': 'str',\n", " 'injuries': 'float',\n", " 'injuries_description': 'str',\n", " 'damage_millions_dollars': 'float',\n", " 'damage_description': 'str',\n", " 'total_injuries': 'float',\n", " 'total_injuries_description': 'str',\n", " 'total_damage_millions_dollars': 'float',\n", " 'total_damage_description': 'str'}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dtyp" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('../Datasets/earthquake_data.csv', dtype = dtyp)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "description_features = [\n", " 'injuries_description', 'damage_description',\n", " 'total_injuries_description', 'total_damage_description'\n", "]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "imp = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value='NA')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "data[description_features] = imp.fit_transform(data[description_features])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | damage_millions_dollars | \n", "
---|---|
damage_description | \n", "\n", " |
1 | \n", "0.417211 | \n", "
2 | \n", "3.078840 | \n", "
3 | \n", "13.818806 | \n", "
4 | \n", "3574.998799 | \n", "
NA | \n", "NaN | \n", "