{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Malnutrition in the World\n", "\n", "Data comes from [here](https://www.kaggle.com/ruchi798/malnutrition-across-the-globe)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:44:41.106971Z", "iopub.status.busy": "2024-08-23T10:44:41.106835Z", "iopub.status.idle": "2024-08-23T10:44:41.657814Z", "shell.execute_reply": "2024-08-23T10:44:41.657369Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.model_selection import GridSearchCV\n", "\n", "from lets_plot import *\n", "from lets_plot.bistro.corr import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:44:41.659504Z", "iopub.status.busy": "2024-08-23T10:44:41.659390Z", "iopub.status.idle": "2024-08-23T10:44:41.661636Z", "shell.execute_reply": "2024-08-23T10:44:41.661394Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:44:41.674863Z", "iopub.status.busy": "2024-08-23T10:44:41.674748Z", "iopub.status.idle": "2024-08-23T10:44:42.123688Z", "shell.execute_reply": "2024-08-23T10:44:42.123246Z" } }, "outputs": [], "source": [ "cwa_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/\"\n", " \"master/data/malnutrition/country_wise_average.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:44:42.125297Z", "iopub.status.busy": "2024-08-23T10:44:42.125220Z", "iopub.status.idle": "2024-08-23T10:44:42.779350Z", "shell.execute_reply": "2024-08-23T10:44:42.778707Z" } }, "outputs": [], "source": [ "me_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/\"\n", " \"master/data/malnutrition/malnutrition_estimates.csv\")\n", "me_df = me_df.sort_values(by='Year', ascending=False).drop_duplicates(subset='Country')\n", "me_df = me_df[['Country', 'ISO code', 'LDC', 'LIFD', 'LLDC or SID2']]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:44:42.781155Z", "iopub.status.busy": "2024-08-23T10:44:42.780943Z", "iopub.status.idle": "2024-08-23T10:44:42.784210Z", "shell.execute_reply": "2024-08-23T10:44:42.783943Z" } }, "outputs": [], "source": [ "df = cwa_df.merge(me_df, on='Country')\n", "df.Country = df.Country.apply(lambda country_name: country_name.capitalize())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### A Bit of Correlation Analysis\n", "\n", "Let's look at the correlation coefficients of random variables that correspond to dataframe columns." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:44:42.785414Z", "iopub.status.busy": "2024-08-23T10:44:42.785336Z", "iopub.status.idle": "2024-08-23T10:44:42.792858Z", "shell.execute_reply": "2024-08-23T10:44:42.792612Z" } }, "outputs": [ { "data": { "text/html": [ " \n", " " ], "text/plain": [ "\n", " | name | \n", "iso_a3 | \n", "continent | \n", "pop_est | \n", "gdp_md | \n", "geometry | \n", "
---|---|---|---|---|---|---|
0 | \n", "Fiji | \n", "FJI | \n", "Oceania | \n", "889953.0 | \n", "5496 | \n", "MULTIPOLYGON (((180.00000 -16.06713, 180.00000... | \n", "
1 | \n", "Tanzania | \n", "TZA | \n", "Africa | \n", "58005463.0 | \n", "63177 | \n", "POLYGON ((33.90371 -0.95000, 34.07262 -1.05982... | \n", "
2 | \n", "W. Sahara | \n", "ESH | \n", "Africa | \n", "603253.0 | \n", "907 | \n", "POLYGON ((-8.66559 27.65643, -8.66512 27.58948... | \n", "
3 | \n", "Canada | \n", "CAN | \n", "North America | \n", "37589262.0 | \n", "1736425 | \n", "MULTIPOLYGON (((-122.84000 49.00000, -122.9742... | \n", "
4 | \n", "United States of America | \n", "USA | \n", "North America | \n", "328239523.0 | \n", "21433226 | \n", "MULTIPOLYGON (((-122.84000 49.00000, -120.0000... | \n", "