{ "cells": [ { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.02784, "end_time": "2021-01-22T16:44:11.041130", "exception": false, "start_time": "2021-01-22T16:44:11.013290", "status": "completed" }, "tags": [] }, "source": [ "# Nobel Prize Exploratory Data Analysis with Lets-Plot\n", "\n", "The data is provided by [Kaggle](https://www.kaggle.com/bahramjannesarr/nobel-prize-from-1901-till-2020)." ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.026124, "end_time": "2021-01-22T16:44:11.094094", "exception": false, "start_time": "2021-01-22T16:44:11.067970", "status": "completed" }, "tags": [] }, "source": [ "## Preparation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:40:20.411988Z", "iopub.status.busy": "2024-04-17T07:40:20.411865Z", "iopub.status.idle": "2024-04-17T07:40:21.115047Z", "shell.execute_reply": "2024-04-17T07:40:21.114552Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: colorcet in /home/asmirnov/Applications/miniconda3/envs/lets-plot-docs/lib/python3.10/site-packages (3.1.0)\r\n" ] } ], "source": [ "from sys import executable\n", "!{executable} -m pip install colorcet" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:40:21.116768Z", "iopub.status.busy": "2024-04-17T07:40:21.116673Z", "iopub.status.idle": "2024-04-17T07:40:21.507658Z", "shell.execute_reply": "2024-04-17T07:40:21.507350Z" }, "papermill": { "duration": 1.276394, "end_time": "2021-01-22T16:44:24.991593", "exception": false, "start_time": "2021-01-22T16:44:23.715199", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import colorcet as cc\n", "\n", "from lets_plot import *\n", "from lets_plot.geo_data import *" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:40:21.509038Z", "iopub.status.busy": "2024-04-17T07:40:21.508922Z", "iopub.status.idle": "2024-04-17T07:40:21.510889Z", "shell.execute_reply": "2024-04-17T07:40:21.510721Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:40:21.512076Z", "iopub.status.busy": "2024-04-17T07:40:21.511929Z", "iopub.status.idle": "2024-04-17T07:40:21.513580Z", "shell.execute_reply": "2024-04-17T07:40:21.513397Z" } }, "outputs": [], "source": [ "def continuous_color_scale(name=None):\n", " return scale_brewer('paint_a', name=name, type='seq', palette='Blues')\n", "\n", "def discrete_color_scale(name=None):\n", " return scale_brewer('paint_a', name=name, type='qual', palette='Set2')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:40:21.514606Z", "iopub.status.busy": "2024-04-17T07:40:21.514488Z", "iopub.status.idle": "2024-04-17T07:40:21.516013Z", "shell.execute_reply": "2024-04-17T07:40:21.515832Z" }, "papermill": { "duration": 0.048815, "end_time": "2021-01-22T16:44:25.078209", "exception": false, "start_time": "2021-01-22T16:44:25.029394", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def get_counts_df(local_df, *, column, column_name=None):\n", " vc_df = local_df[column].value_counts().to_frame('count')\n", " vc_df.index.name = column_name if column_name else column\n", " vc_df = vc_df.reset_index()\n", " return vc_df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:40:21.517012Z", "iopub.status.busy": "2024-04-17T07:40:21.516895Z", "iopub.status.idle": "2024-04-17T07:40:22.400464Z", "shell.execute_reply": "2024-04-17T07:40:22.400146Z" }, "papermill": { "duration": 12.567732, "end_time": "2021-01-22T16:44:37.683148", "exception": false, "start_time": "2021-01-22T16:44:25.115416", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "\n", " | firstname | \n", "surname | \n", "born_country | \n", "died_country | \n", "gender | \n", "year | \n", "category | \n", "prize_share | \n", "name_of_university | \n", "city_of_university | \n", "country_of_university | \n", "born_month | \n", "age | \n", "age_get_prize | \n", "decade | \n", "fullname | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Wilhelm Conrad | \n", "Röntgen | \n", "Germany | \n", "Germany | \n", "male | \n", "1901 | \n", "physics | \n", "1 | \n", "Munich University | \n", "Munich | \n", "Germany | \n", "Mar | \n", "78 | \n", "56 | \n", "1900 | \n", "Wilhelm Conrad Röntgen | \n", "
1 | \n", "Hendrik A. | \n", "Lorentz | \n", "Netherlands | \n", "Netherlands | \n", "male | \n", "1902 | \n", "physics | \n", "2 | \n", "Leiden University | \n", "Leiden | \n", "NaN | \n", "Jul | \n", "75 | \n", "49 | \n", "1900 | \n", "Hendrik A. Lorentz | \n", "
2 | \n", "Pieter | \n", "Zeeman | \n", "Netherlands | \n", "Netherlands | \n", "male | \n", "1902 | \n", "physics | \n", "2 | \n", "Amsterdam University | \n", "Amsterdam | \n", "NaN | \n", "May | \n", "78 | \n", "37 | \n", "1900 | \n", "Pieter Zeeman | \n", "
3 | \n", "Henri | \n", "Becquerel | \n", "France | \n", "France | \n", "male | \n", "1903 | \n", "physics | \n", "2 | \n", "École Polytechnique | \n", "Paris | \n", "France | \n", "Dec | \n", "56 | \n", "51 | \n", "1900 | \n", "Henri Becquerel | \n", "
4 | \n", "Pierre | \n", "Curie | \n", "France | \n", "France | \n", "male | \n", "1903 | \n", "physics | \n", "4 | \n", "École municipale de physique et de chimie indu... | \n", "Paris | \n", "France | \n", "May | \n", "47 | \n", "44 | \n", "1900 | \n", "Pierre Curie | \n", "