{ "cells": [ { "cell_type": "markdown", "metadata": { "toc": true }, "source": [ "

Table of Contents

\n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Description\n", "This is an example tutorial to use my module bhishan for the plotly\n", "extension for pandas.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:01.670967Z", "start_time": "2020-09-28T22:42:59.893542Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Bhishan Poudel 2020-09-28 \n", "\n", "CPython 3.7.7\n", "IPython 7.18.1\n", "\n", "compiler : Clang 4.0.1 (tags/RELEASE_401/final)\n", "system : Darwin\n", "release : 19.6.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 4\n", "interpreter: 64bit\n", "pandas 1.1.0\n", "bhishan 0.3.1\n", "autopep8 1.5.2\n", "seaborn 0.11.0\n", "json 2.0.9\n", "numpy 1.18.4\n", "\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import bhishan\n", "from bhishan import bp\n", "import matplotlib.pyplot as plt\n", "\n", "%load_ext autoreload\n", "%load_ext watermark\n", "\n", "%autoreload 2\n", "%watermark -a \"Bhishan Poudel\" -d -v -m\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Using plotly api in module bhishan" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:01.731046Z", "start_time": "2020-09-28T22:43:01.673828Z" } }, "outputs": [], "source": [ "# print(sns.get_dataset_names())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:01.818206Z", "start_time": "2020-09-28T22:43:01.736379Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
\n", "
" ], "text/plain": [ " survived pclass sex age sibsp parch fare embarked class \\\n", "0 0 3 male 22.0 1 0 7.2500 S Third \n", "1 1 1 female 38.0 1 0 71.2833 C First \n", "2 1 3 female 26.0 0 0 7.9250 S Third \n", "3 1 1 female 35.0 1 0 53.1000 S First \n", "4 0 3 male 35.0 0 0 8.0500 S Third \n", "\n", " who adult_male deck embark_town alive alone \n", "0 man True NaN Southampton no False \n", "1 woman False C Cherbourg yes False \n", "2 woman False NaN Southampton yes True \n", "3 woman False C Southampton yes False \n", "4 man True NaN Southampton no True " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = sns.load_dataset('titanic')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:01.942011Z", "start_time": "2020-09-28T22:43:01.821554Z" } }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
embarked Count Percent Cumulative Count Cumulative Percent
0S64472.44%64472.44%
1C16818.90%81291.34%
2Q778.66%889100.00%
" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.bp.freq(['embarked'],style=True)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:02.047957Z", "start_time": "2020-09-28T22:43:01.944609Z" } }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Feature Type N Count Unique Missing MissingPct Zeros ZerosPct mean std min max 25% 50% 75%
11deckcategory891203768877.2200.00
3agefloat648917148817719.8700.0029.7014.530.4280.0020.1228.0038.00
7embarkedobject891889320.2200.00
12embark_townobject891889320.2200.00
5parchint64891891700.0067876.090.380.810.006.000.000.000.00
4sibspint64891891700.0060868.240.521.100.008.000.000.001.00
0survivedint64891891200.0054961.620.380.490.001.000.000.001.00
10adult_malebool891891200.0035439.73
14alonebool891891200.0035439.73
6farefloat6489189124800.00151.6832.2049.690.00512.337.9114.4531.00
1pclassint64891891300.0000.002.310.841.003.002.003.003.00
2sexobject891891200.0000.00
8classcategory891891300.0000.00
9whoobject891891300.0000.00
13aliveobject891891200.0000.00
" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.bp.describe()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:02.097979Z", "start_time": "2020-09-28T22:43:02.049860Z" } }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.bp.get_duplicate_columns()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:02.137152Z", "start_time": "2020-09-28T22:43:02.099799Z" } }, "outputs": [ { "data": { "text/plain": [ "survived int64\n", "pclass int64\n", "sex object\n", "age float64\n", "sibsp int64\n", "parch int64\n", "fare float64\n", "embarked object\n", "class object\n", "who object\n", "adult_male bool\n", "deck object\n", "embark_town object\n", "alive object\n", "alone bool\n", "dtype: object" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:02.185925Z", "start_time": "2020-09-28T22:43:02.139244Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
aba_dup
0010
1121
2232
\n", "
" ], "text/plain": [ " a b a_dup\n", "0 0 1 0\n", "1 1 2 1\n", "2 2 3 2" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.DataFrame({'a': range(3),'b':range(1,4),'a_dup':range(3)})\n", "df1" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:02.235248Z", "start_time": "2020-09-28T22:43:02.190994Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a == a_dup\n" ] }, { "data": { "text/plain": [ "['a_dup']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.bp.get_duplicate_columns()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2020-09-28T22:43:02.807256Z", "start_time": "2020-09-28T22:43:02.239121Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Missing values high threshold = 80%\n", "\n", "Number of missing values features: 4\n", "cols_missing_high = []\n", "cols_missing_low = ['deck', 'age', 'embarked', 'embark_town']\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Feature Type Count Missing Zeros Unique MissingPct ZerosPct count mean std min 25% 50% 75% max
11deckobject8916880777.2166110.000000
3agefloat6489117708819.8653200.000000714.00000029.69911814.5264970.42000020.12500028.00000038.00000080.000000
7embarkedobject8912030.2244670.000000
12embark_townobject8912030.2244670.000000
" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.bp.missing()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python (dataSc)", "language": "python", "name": "datasc" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }