{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from siuba import *\n", "from siuba import meta_hook\n", "\n", "import pandas as pd\n", "\n", "from pandas import DataFrame, Series" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = DataFrame({\n", " \"repo\": [\"pandas\", \"dplyr\", \"ggplot2\", \"plotnine\"],\n", " \"owner\": [\"pandas-dev\", \"tidyverse\", \"tidyverse\", \"has2k1\"],\n", " \"language\": [\"python\", \"R\", \"R\", \"python\"],\n", " \"stars\": [17800, 2800, 3500, 1450],\n", " \"x\": [1,2,3,None]\n", " })" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## mutate" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from pandas.core.groupby import DataFrameGroupBy" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsxrel_stars1
0pandaspandas-devpython178001.00
1dplyrtidyverseR28002.00
2ggplot2tidyverseR35003.0700
3plotninehas2k1python1450NaN0
\n", "
" ], "text/plain": [ " repo owner language stars x rel_stars1\n", "0 pandas pandas-dev python 17800 1.0 0\n", "1 dplyr tidyverse R 2800 2.0 0\n", "2 ggplot2 tidyverse R 3500 3.0 700\n", "3 plotnine has2k1 python 1450 NaN 0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdf = group_by(df, 'language', \"owner\")\n", "\n", "out = mutate(gdf, rel_stars1 = _.stars - _.stars.min())\n", "\n", "ungroup(out)\n", "\n", "df.siu_group_by(\"language\", \"owner\").siu_mutate(rel_stars1 = _.stars - _.stars.min()).siu_ungroup()\n", "\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsxrel_stars1rel_stars2
0pandaspandas-devpython178001.0035600
1dplyrtidyverseR28002.005600
2ggplot2tidyverseR35003.07007000
3plotninehas2k1python1450NaN02900
\n", "
" ], "text/plain": [ " repo owner language stars x rel_stars1 rel_stars2\n", "0 pandas pandas-dev python 17800 1.0 0 35600\n", "1 dplyr tidyverse R 2800 2.0 0 5600\n", "2 ggplot2 tidyverse R 3500 3.0 700 7000\n", "3 plotnine has2k1 python 1450 NaN 0 2900" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ungroup(mutate(out, rel_stars2 = _.stars + _.stars))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsxrel_stars1
0pandaspandas-devpython178001.00
1dplyrtidyverseR28002.00
2ggplot2tidyverseR35003.0700
3plotninehas2k1python1450NaN0
\n", "
" ], "text/plain": [ " repo owner language stars x rel_stars1\n", "0 pandas pandas-dev python 17800 1.0 0\n", "1 dplyr tidyverse R 2800 2.0 0\n", "2 ggplot2 tidyverse R 3500 3.0 700\n", "3 plotnine has2k1 python 1450 NaN 0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df \\\n", " .siu_group_by(\"language\", \"owner\") \\\n", " .siu_mutate(rel_stars1 = _.stars - _.stars.min()) \\\n", " .siu_ungroup()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## filter" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsx
0pandaspandas-devpython178001.0
1ggplot2tidyverseR35003.0
\n", "
" ], "text/plain": [ " repo owner language stars x\n", "0 pandas pandas-dev python 17800 1.0\n", "1 ggplot2 tidyverse R 3500 3.0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# TODO: change name filter to query?\n", "\n", "# regular filter\n", "filter(df, _.stars > 3000, _.stars < 15000)\n", "\n", "# grouped filter\n", "gdf = group_by(df, \"language\")\n", "\n", "ungroup(filter(gdf, _.stars != _.stars.min()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## summarize" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languagettl_starswat
0R63002800
1python192501450
\n", "
" ], "text/plain": [ " language ttl_stars wat\n", "0 R 6300 2800\n", "1 python 19250 1450" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# summarize DataFrame\n", "summarize(df, min_stars = _.stars.min())\n", "\n", "# summarize grouped DataFrame\n", "gdf = group_by(df, \"language\")\n", "\n", "summarize(gdf, ttl_stars = _.stars.sum(), wat = _.stars.min())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## transmute" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languagereporel_stars1
0pythonpandas16350
1Rdplyr0
2Rggplot2700
3pythonplotnine0
\n", "
" ], "text/plain": [ " language repo rel_stars1\n", "0 python pandas 16350\n", "1 R dplyr 0\n", "2 R ggplot2 700\n", "3 python plotnine 0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "transmute(df, \"repo\", rel_stars1 = _.stars - _.stars.min())\n", "\n", "ungroup(transmute(gdf, \"repo\", rel_stars1 = _.stars - _.stars.min()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## select" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yrepoownerstars
01.0pandaspandas-dev17800
12.0dplyrtidyverse2800
23.0ggplot2tidyverse3500
3NaNplotninehas2k11450
\n", "
" ], "text/plain": [ " y repo owner stars\n", "0 1.0 pandas pandas-dev 17800\n", "1 2.0 dplyr tidyverse 2800\n", "2 3.0 ggplot2 tidyverse 3500\n", "3 NaN plotnine has2k1 1450" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# thoughts:\n", "# + can use dynamic values, e.g. colname == .x\n", "# + if select implements some name class, then nothing magic happening\n", "# e.g. _.y == _.x is equivalent to lambda cols: cols.y == cols.x\n", "# - long winded (==, _.y seems harder to read than \"y\")\n", "# select(df, _.y == _.x, -_.language)\n", "\n", "select(df, _.y == _.x, -_.language)\n", "# considered alternative with strings. E.g...\n", "# select(df, \"y = x\", \"language\")\n", "# select(df, dict(y = \"x\"), \"language\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## arrange" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsx
3plotninehas2k1python1450NaN
1dplyrtidyverseR28002.0
2ggplot2tidyverseR35003.0
0pandaspandas-devpython178001.0
\n", "
" ], "text/plain": [ " repo owner language stars x\n", "3 plotnine has2k1 python 1450 NaN\n", "1 dplyr tidyverse R 2800 2.0\n", "2 ggplot2 tidyverse R 3500 3.0\n", "0 pandas pandas-dev python 17800 1.0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arrange(df, -_.owner, _.repo)\n", "\n", "arrange(df, _.owner.str.len())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## helpers (if_else, case_when)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['no', 'yeah', 'no', 'no'], dtype=' 10000: \"incredible!\",\n", " _.stars > 1000: \"pretty good!\",\n", " _.stars > 100 : \"keep going!\",\n", " True: \"I don't know\"\n", "})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## nest and unnest " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languagedata
0Rrepo owner stars x\n", "1 dplyr ...
1pythonrepo owner stars x\n", "0 panda...
\n", "
" ], "text/plain": [ " language data\n", "0 R repo owner stars x\n", "1 dplyr ...\n", "1 python repo owner stars x\n", "0 panda..." ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# data column is an array of DataFrames\n", "nest(df, -_.language, key = \"data\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languagerepoownerstarsx
0Rdplyrtidyverse28002.0
1Rggplot2tidyverse35003.0
2pythonpandaspandas-dev178001.0
3pythonplotninehas2k11450NaN
\n", "
" ], "text/plain": [ " language repo owner stars x\n", "0 R dplyr tidyverse 2800 2.0\n", "1 R ggplot2 tidyverse 3500 3.0\n", "2 python pandas pandas-dev 17800 1.0\n", "3 python plotnine has2k1 1450 NaN" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "unnest(nest(df, -_.language, key = \"data\"), \"data\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## count " ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languageownern
0Rtidyverse2
1pythonhas2k11
2pythonpandas-dev1
\n", "
" ], "text/plain": [ " language owner n\n", "0 R tidyverse 2\n", "1 python has2k1 1\n", "2 python pandas-dev 1" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "count(df, \"language\", \"owner\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsxn
0pandaspandas-devpython178001.01
1dplyrtidyverseR28002.02
2ggplot2tidyverseR35003.02
3plotninehas2k1python1450NaN1
\n", "
" ], "text/plain": [ " repo owner language stars x n\n", "0 pandas pandas-dev python 17800 1.0 1\n", "1 dplyr tidyverse R 2800 2.0 2\n", "2 ggplot2 tidyverse R 3500 3.0 2\n", "3 plotnine has2k1 python 1450 NaN 1" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "add_count(df, \"language\", \"owner\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Distinct" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsx
0pandaspandas-devpython178001.0
1dplyrtidyverseR28002.0
\n", "
" ], "text/plain": [ " repo owner language stars x\n", "0 pandas pandas-dev python 17800 1.0\n", "1 dplyr tidyverse R 2800 2.0" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct(df, _.language, _keep_all = True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
language
0python
1R
\n", "
" ], "text/plain": [ " language\n", "0 python\n", "1 R" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct(df, \"language\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lang2
0python
1r
\n", "
" ], "text/plain": [ " lang2\n", "0 python\n", "1 r" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct(df, lang2 = _.language.str.lower())" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lang2
0R
1PYTHON
\n", "
" ], "text/plain": [ " lang2\n", "0 R\n", "1 PYTHON" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdf = group_by(df, \"language\")\n", "\n", "ungroup(distinct(gdf, lang2 = _.language.str.upper()))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Joins" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsxrepo2follow
0pandaspandas-devpython178001.0pandasTrue
1dplyrtidyverseR28002.0dplyrFalse
2ggplot2tidyverseR35003.0NoneTrue
3plotninehas2k1python1450NaNNoneFalse
\n", "
" ], "text/plain": [ " repo owner language stars x repo2 follow\n", "0 pandas pandas-dev python 17800 1.0 pandas True\n", "1 dplyr tidyverse R 2800 2.0 dplyr False\n", "2 ggplot2 tidyverse R 3500 3.0 None True\n", "3 plotnine has2k1 python 1450 NaN None False" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "follow = pd.DataFrame({\n", " 'repo': ['pandas', 'dplyr', 'ggplot2', 'plotnine'],\n", " 'repo2': ['pandas', 'dplyr', None, None],\n", " 'follow': [True, False, True, False]\n", "})\n", "\n", "join(df, follow, how = \"inner\", on = \"repo\")\n", "# join(df, df)\n", "\n", "join(df, follow, how = \"inner\", on = {\"repo\": \"repo2\"})\n", "\n", "left_join(df, follow, \"repo\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Spread and Gather" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerxRpython
0dplyrtidyverse2.02800.0NaN
1ggplot2tidyverse3.03500.0NaN
2pandaspandas-dev1.0NaN17800.0
3plotninehas2k1NaNNaN1450.0
\n", "
" ], "text/plain": [ " repo owner x R python\n", "0 dplyr tidyverse 2.0 2800.0 NaN\n", "1 ggplot2 tidyverse 3.0 3500.0 NaN\n", "2 pandas pandas-dev 1.0 NaN 17800.0\n", "3 plotnine has2k1 NaN NaN 1450.0" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "df2 = spread(df, 'language', 'stars')\n", "df2\n", "# this should raise an error, because duplicate id col x key combos\n", "#spread(pd.concat([df, df]), 'language', 'stars')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerxkeyvalue
0dplyrtidyverse2.0R2800.0
1ggplot2tidyverse3.0R3500.0
2pandaspandas-dev1.0python17800.0
3plotninehas2k1NaNpython1450.0
\n", "
" ], "text/plain": [ " repo owner x key value\n", "0 dplyr tidyverse 2.0 R 2800.0\n", "1 ggplot2 tidyverse 3.0 R 3500.0\n", "2 pandas pandas-dev 1.0 python 17800.0\n", "3 plotnine has2k1 NaN python 1450.0" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gather(df2, \"key\", \"value\", \"R\", \"python\", drop_na = True)\n", "\n", "# TODO\n", "# gather(df2, _.key, _.value, _[\"R\":\"python\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Piping" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'x is: 3'" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f = Pipeable(f = lambda x: x + 1) >> Pipeable(f = lambda x: \"x is: {}\".format(x))\n", "\n", "f(2)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repoownerlanguagestarsxnew_repocase
0pandaspandas-devpython178001.0pandas waatttaw yeah
\n", "
" ], "text/plain": [ " repo owner language stars x new_repo case\n", "0 pandas pandas-dev python 17800 1.0 pandas waattt aw yeah" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(df\n", " >> mutate(\n", " new_repo = _.repo + \" waattt\",\n", " case = case_when(_, {_.language == \"python\": \"aw yeah\", True: 'wat'})\n", " )\n", " >> filter(_.stars > 5000)\n", " )" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languagewat
0R3150.0
1python9625.0
\n", "
" ], "text/plain": [ " language wat\n", "0 R 3150.0\n", "1 python 9625.0" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df >> group_by(_.language) >> summarize(wat = _.stars.mean())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "222px" }, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 4 }