{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3d8dbd18", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:22.688903Z", "iopub.status.busy": "2021-07-16T22:28:22.687968Z", "iopub.status.idle": "2021-07-16T22:28:23.757532Z", "shell.execute_reply": "2021-07-16T22:28:23.758081Z" } }, "outputs": [ { "data": { "text/html": [ "
Try this notebook on binder.
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "###
★ cur_group_id
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "##### Get the current group id\n", "\n", "##### Args:\n", "  `_data`: The dataframe \n", "\n", "##### Returns:\n", "  The current group id \n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "###
★ cur_group_rows
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "##### Get the current group row indices\n", "\n", "##### Args:\n", "  `_data`: The dataframe \n", "\n", "##### Returns:\n", "  The current group rows \n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "###
★ cur_data
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "##### Get the current dataframe\n", "\n", "##### Args:\n", "  `_data`: The dataframe \n", "\n", "##### Returns:\n", "  The current dataframe \n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "###
★ cur_data_all
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "##### Get the current data for the current group including\n", "the grouping variables \n", "\n", "##### Args:\n", "  `_data`: The dataframe \n", "\n", "##### Returns:\n", "  The current dataframe \n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "###
★ cur_column
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "##### Get the current column\n", "\n", "##### Args:\n", "  `_data`: The dataframe \n", "  `_name`: The column name \n", "\n", "##### Returns:\n", "  The current column \n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# https://dplyr.tidyverse.org/reference/context.html\n", "%run nb_helpers.py\n", "from datar.all import *\n", "\n", "nb_header(cur_group_id, cur_group_rows, cur_data, cur_data_all, cur_column, book='context')" ] }, { "cell_type": "code", "execution_count": 2, "id": "ec43ebdc", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:23.769370Z", "iopub.status.busy": "2021-07-16T22:28:23.767355Z", "iopub.status.idle": "2021-07-16T22:28:24.044934Z", "shell.execute_reply": "2021-07-16T22:28:24.045340Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gn
<object><int64>
0a1
1b2
2c3
\n", "
\n" ], "text/plain": [ " g n\n", " \n", "0 a 1\n", "1 b 2\n", "2 c 3" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = tibble(\n", " g=['a'] + ['b'] * 2 + ['c'] * 3,\n", " x=runif(6),\n", " y=runif(6)\n", ")\n", "gf = df >> group_by(f.g)\n", "\n", "gf >> summarise(n = n())" ] }, { "cell_type": "code", "execution_count": 3, "id": "d162c09b", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.060058Z", "iopub.status.busy": "2021-07-16T22:28:24.059390Z", "iopub.status.idle": "2021-07-16T22:28:24.063439Z", "shell.execute_reply": "2021-07-16T22:28:24.064000Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gxy
<object><float64><float64>
0a0.8797330.242456
1b0.7895500.165711
2b0.0738770.198040
3c0.6778260.186310
4c0.3240690.212226
5c0.5898810.990174
\n", "
\n", "

TibbleGrouped: g (n=3)" ], "text/plain": [ " g x y\n", " \n", "0 a 0.879733 0.242456\n", "1 b 0.789550 0.165711\n", "2 b 0.073877 0.198040\n", "3 c 0.677826 0.186310\n", "4 c 0.324069 0.212226\n", "5 c 0.589881 0.990174\n", "[TibbleGrouped: g (n=3)]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf" ] }, { "cell_type": "code", "execution_count": 4, "id": "78aae172", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.093244Z", "iopub.status.busy": "2021-07-16T22:28:24.092669Z", "iopub.status.idle": "2021-07-16T22:28:24.251043Z", "shell.execute_reply": "2021-07-16T22:28:24.251600Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gxyid
<object><float64><float64><int64>
0a0.8797330.2424560
1b0.7895500.1657111
2b0.0738770.1980401
3c0.6778260.1863102
4c0.3240690.2122262
5c0.5898810.9901742
\n", "
\n", "

TibbleGrouped: g (n=3)" ], "text/plain": [ " g x y id\n", " \n", "0 a 0.879733 0.242456 0\n", "1 b 0.789550 0.165711 1\n", "2 b 0.073877 0.198040 1\n", "3 c 0.677826 0.186310 2\n", "4 c 0.324069 0.212226 2\n", "5 c 0.589881 0.990174 2\n", "[TibbleGrouped: g (n=3)]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf >> mutate(id=cur_group_id()) " ] }, { "cell_type": "code", "execution_count": 5, "id": "49c59913", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.293049Z", "iopub.status.busy": "2021-07-16T22:28:24.292380Z", "iopub.status.idle": "2021-07-16T22:28:24.301115Z", "shell.execute_reply": "2021-07-16T22:28:24.301811Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
grow
<object><object>
0a[0]
1b[1, 2]
2c[3, 4, 5]
\n", "
\n" ], "text/plain": [ " g row\n", " \n", "0 a [0]\n", "1 b [1, 2]\n", "2 c [3, 4, 5]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf >> summarise(row=cur_group_rows()) " ] }, { "cell_type": "code", "execution_count": 6, "id": "cb760a8a", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.335103Z", "iopub.status.busy": "2021-07-16T22:28:24.334457Z", "iopub.status.idle": "2021-07-16T22:28:24.348919Z", "shell.execute_reply": "2021-07-16T22:28:24.349292Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gdata
<object><object>
0a<DF 1x1>
1b<DF 1x1>
2c<DF 1x1>
\n", "
\n" ], "text/plain": [ " g data\n", " \n", "0 a \n", "1 b \n", "2 c " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf_group = gf >> summarise(data=cur_group())\n", "gf_group " ] }, { "cell_type": "code", "execution_count": 7, "id": "4c25f162", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.367887Z", "iopub.status.busy": "2021-07-16T22:28:24.367244Z", "iopub.status.idle": "2021-07-16T22:28:24.381979Z", "shell.execute_reply": "2021-07-16T22:28:24.382350Z" } }, "outputs": [ { "data": { "text/plain": [ "0 \n", "1 \n", "2 \n", "Name: data, dtype: object" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf_group >> pull(f.data)" ] }, { "cell_type": "code", "execution_count": 8, "id": "71996f23", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.447148Z", "iopub.status.busy": "2021-07-16T22:28:24.446024Z", "iopub.status.idle": "2021-07-16T22:28:24.467264Z", "shell.execute_reply": "2021-07-16T22:28:24.467729Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gdata
<object><object>
0a<DF 1x2>
1b<DF 2x2>
2c<DF 3x2>
\n", "
\n" ], "text/plain": [ " g data\n", " \n", "0 a \n", "1 b \n", "2 c " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf_data = gf >> summarise(data=cur_data())\n", "gf_data" ] }, { "cell_type": "code", "execution_count": 9, "id": "9b37d097", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.496469Z", "iopub.status.busy": "2021-07-16T22:28:24.495880Z", "iopub.status.idle": "2021-07-16T22:28:24.515602Z", "shell.execute_reply": "2021-07-16T22:28:24.516019Z" } }, "outputs": [ { "data": { "text/plain": [ "[ x y\n", " \n", " 0 0.879733 0.242456,\n", " x y\n", " \n", " 1 0.789550 0.165711\n", " 2 0.073877 0.198040,\n", " x y\n", " \n", " 3 0.677826 0.186310\n", " 4 0.324069 0.212226\n", " 5 0.589881 0.990174]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf_data >> pull(f.data, to='list')" ] }, { "cell_type": "code", "execution_count": 10, "id": "03c07299", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.549672Z", "iopub.status.busy": "2021-07-16T22:28:24.548821Z", "iopub.status.idle": "2021-07-16T22:28:24.555222Z", "shell.execute_reply": "2021-07-16T22:28:24.555691Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gdata
<object><object>
0a<DF 1x3>
1b<DF 2x3>
2c<DF 3x3>
\n", "
\n" ], "text/plain": [ " g data\n", " \n", "0 a \n", "1 b \n", "2 c " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf_data_all = gf >> summarise(data=cur_data_all())\n", "gf_data_all" ] }, { "cell_type": "code", "execution_count": 11, "id": "4cc41680", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.571425Z", "iopub.status.busy": "2021-07-16T22:28:24.570212Z", "iopub.status.idle": "2021-07-16T22:28:24.578675Z", "shell.execute_reply": "2021-07-16T22:28:24.579169Z" } }, "outputs": [ { "data": { "text/plain": [ "[ g x y\n", " \n", " 0 a 0.879733 0.242456,\n", " g x y\n", " \n", " 1 b 0.789550 0.165711\n", " 2 b 0.073877 0.198040,\n", " g x y\n", " \n", " 3 c 0.677826 0.186310\n", " 4 c 0.324069 0.212226\n", " 5 c 0.589881 0.990174]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gf_data_all >> pull(f.data, to='list')" ] }, { "cell_type": "code", "execution_count": 12, "id": "fe4097d2", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.595194Z", "iopub.status.busy": "2021-07-16T22:28:24.593255Z", "iopub.status.idle": "2021-07-16T22:28:24.671560Z", "shell.execute_reply": "2021-07-16T22:28:24.672013Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
<object><object>
0x 0.7739296633011361y 0.05878489331508395
1x 0.6233885082054422y 0.027460112154048803
2x 0.005457753705443728y 0.03921965587769912
3x 0.45944873370090106y 0.034711398724083777
4x 0.10502100613889181y 0.04504004423820979
5x 0.3479600358358678y 0.9804449881028017
\n", "
\n" ], "text/plain": [ " x y\n", " \n", "0 x 0.7739296633011361 y 0.05878489331508395\n", "1 x 0.6233885082054422 y 0.027460112154048803\n", "2 x 0.005457753705443728 y 0.03921965587769912\n", "3 x 0.45944873370090106 y 0.034711398724083777\n", "4 x 0.10502100613889181 y 0.04504004423820979\n", "5 x 0.3479600358358678 y 0.9804449881028017" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df >> select(f.x, f.y) >> mutate(\n", " across(\n", " everything(), \n", " lambda x, cc: [cc + ' '] * x.shape[0] + (x**2).astype(str), cc=cur_column()\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 13, "id": "c99b8dee", "metadata": { "execution": { "iopub.execute_input": "2021-07-16T22:28:24.695778Z", "iopub.status.busy": "2021-07-16T22:28:24.695126Z", "iopub.status.idle": "2021-07-16T22:28:24.699137Z", "shell.execute_reply": "2021-07-16T22:28:24.699704Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gxy
<object><object><object>
0ax 0.7739296633011361y 0.05878489331508395
1bx 0.6233885082054422y 0.027460112154048803
2bx 0.005457753705443728y 0.03921965587769912
3cx 0.45944873370090106y 0.034711398724083777
4cx 0.10502100613889181y 0.04504004423820979
5cx 0.3479600358358678y 0.9804449881028017
\n", "
\n" ], "text/plain": [ " g x y\n", " \n", "0 a x 0.7739296633011361 y 0.05878489331508395\n", "1 b x 0.6233885082054422 y 0.027460112154048803\n", "2 b x 0.005457753705443728 y 0.03921965587769912\n", "3 c x 0.45944873370090106 y 0.034711398724083777\n", "4 c x 0.10502100613889181 y 0.04504004423820979\n", "5 c x 0.3479600358358678 y 0.9804449881028017" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# or you can use x.name, since x is a Series\n", "df >> mutate(across(\n", " [f.x, f.y], \n", " lambda x: [x.name + ' '] * x.shape[0] + (x**2).astype(str)\n", "))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.5 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" }, "vscode": { "interpreter": { "hash": "9ed5c94d10bf621c6841991b7e31ffd0f3c8de8ec4167710459737a50edc58e4" } } }, "nbformat": 4, "nbformat_minor": 5 }