{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This is a Notebook for the blog post about merging columns in R (https://www.marsja.se/how-to-concatenate-two-columns-or-more-in-r-stringr-tidyr/). \n", "\n", "\n", "## Importing Data\n", "First, download the data here and place it in the same directory as this notebook (or change the path to where you have it)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Example Data:\n", "library(readxl)\n", "dataf <- read_excel(\"combine_columns_in_R.xlsx\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### See the Structure of the Data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Classes 'tbl_df', 'tbl' and 'data.frame':\t7 obs. of 5 variables:\n", " $ Date : num 15 11 11 10 14 10 12\n", " $ Month: chr \"Jun\" \"Jul\" \"Aug\" \"Sep\" ...\n", " $ Year : num 2015 2016 2017 2018 2019 ...\n", " $ Snake: chr \"Python\" \"Boa\" \"Python\" \"Boa\" ...\n", " $ Size : chr \"Small\" \"Large\" \"Medium\" \"Large\" ...\n" ] } ], "source": [ "str(dataf)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### First 5 Row of the Data\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
DateMonthYearSnakeSize
15 Jun 2015 PythonSmall
11 Jul 2016 Boa Large
11 Aug 2017 PythonMedium
10 Sep 2018 Boa Large
14 Oct 2019 PythonSmall
\n" ], "text/latex": [ "\\begin{tabular}{r|lllll}\n", " Date & Month & Year & Snake & Size\\\\\n", "\\hline\n", "\t 15 & Jun & 2015 & Python & Small \\\\\n", "\t 11 & Jul & 2016 & Boa & Large \\\\\n", "\t 11 & Aug & 2017 & Python & Medium\\\\\n", "\t 10 & Sep & 2018 & Boa & Large \\\\\n", "\t 14 & Oct & 2019 & Python & Small \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| Date | Month | Year | Snake | Size |\n", "|---|---|---|---|---|\n", "| 15 | Jun | 2015 | Python | Small |\n", "| 11 | Jul | 2016 | Boa | Large |\n", "| 11 | Aug | 2017 | Python | Medium |\n", "| 10 | Sep | 2018 | Boa | Large |\n", "| 14 | Oct | 2019 | Python | Small |\n", "\n" ], "text/plain": [ " Date Month Year Snake Size \n", "1 15 Jun 2015 Python Small \n", "2 11 Jul 2016 Boa Large \n", "3 11 Aug 2017 Python Medium\n", "4 10 Sep 2018 Boa Large \n", "5 14 Oct 2019 Python Small " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "head(dataf, 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example 1: Concatenating With Paste()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dataf$MY <- paste(dataf$Month, dataf$Year)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example 2: Concatenate Two Columns with Hyphen as Separator" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
DateMonthYearSnakeSizeMY
15 Jun 2015 Python Small Jun - 2015
11 Jul 2016 Boa Large Jul - 2016
11 Aug 2017 Python Medium Aug - 2017
10 Sep 2018 Boa Large Sep - 2018
14 Oct 2019 Python Small Oct - 2019
10 Nov 2020 Python Medium Nov - 2020
12 Dec 2021 Boa Medium Dec - 2021
\n" ], "text/latex": [ "\\begin{tabular}{r|llllll}\n", " Date & Month & Year & Snake & Size & MY\\\\\n", "\\hline\n", "\t 15 & Jun & 2015 & Python & Small & Jun - 2015\\\\\n", "\t 11 & Jul & 2016 & Boa & Large & Jul - 2016\\\\\n", "\t 11 & Aug & 2017 & Python & Medium & Aug - 2017\\\\\n", "\t 10 & Sep & 2018 & Boa & Large & Sep - 2018\\\\\n", "\t 14 & Oct & 2019 & Python & Small & Oct - 2019\\\\\n", "\t 10 & Nov & 2020 & Python & Medium & Nov - 2020\\\\\n", "\t 12 & Dec & 2021 & Boa & Medium & Dec - 2021\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| Date | Month | Year | Snake | Size | MY |\n", "|---|---|---|---|---|---|\n", "| 15 | Jun | 2015 | Python | Small | Jun - 2015 |\n", "| 11 | Jul | 2016 | Boa | Large | Jul - 2016 |\n", "| 11 | Aug | 2017 | Python | Medium | Aug - 2017 |\n", "| 10 | Sep | 2018 | Boa | Large | Sep - 2018 |\n", "| 14 | Oct | 2019 | Python | Small | Oct - 2019 |\n", "| 10 | Nov | 2020 | Python | Medium | Nov - 2020 |\n", "| 12 | Dec | 2021 | Boa | Medium | Dec - 2021 |\n", "\n" ], "text/plain": [ " Date Month Year Snake Size MY \n", "1 15 Jun 2015 Python Small Jun - 2015\n", "2 11 Jul 2016 Boa Large Jul - 2016\n", "3 11 Aug 2017 Python Medium Aug - 2017\n", "4 10 Sep 2018 Boa Large Sep - 2018\n", "5 14 Oct 2019 Python Small Oct - 2019\n", "6 10 Nov 2020 Python Medium Nov - 2020\n", "7 12 Dec 2021 Boa Medium Dec - 2021" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dataf$MY <- paste(dataf$Month, \"-\", dataf$Year)\n", "\n", "\n", "dataf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Alternative: \n", "If we want to have no whitespaces between values/characters" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
DateMonthYearSnakeSizeMY
15 Jun 2015 Python Small Jun-2015
11 Jul 2016 Boa Large Jul-2016
11 Aug 2017 Python Medium Aug-2017
10 Sep 2018 Boa Large Sep-2018
14 Oct 2019 Python Small Oct-2019
10 Nov 2020 Python Medium Nov-2020
12 Dec 2021 Boa Medium Dec-2021
\n" ], "text/latex": [ "\\begin{tabular}{r|llllll}\n", " Date & Month & Year & Snake & Size & MY\\\\\n", "\\hline\n", "\t 15 & Jun & 2015 & Python & Small & Jun-2015\\\\\n", "\t 11 & Jul & 2016 & Boa & Large & Jul-2016\\\\\n", "\t 11 & Aug & 2017 & Python & Medium & Aug-2017\\\\\n", "\t 10 & Sep & 2018 & Boa & Large & Sep-2018\\\\\n", "\t 14 & Oct & 2019 & Python & Small & Oct-2019\\\\\n", "\t 10 & Nov & 2020 & Python & Medium & Nov-2020\\\\\n", "\t 12 & Dec & 2021 & Boa & Medium & Dec-2021\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| Date | Month | Year | Snake | Size | MY |\n", "|---|---|---|---|---|---|\n", "| 15 | Jun | 2015 | Python | Small | Jun-2015 |\n", "| 11 | Jul | 2016 | Boa | Large | Jul-2016 |\n", "| 11 | Aug | 2017 | Python | Medium | Aug-2017 |\n", "| 10 | Sep | 2018 | Boa | Large | Sep-2018 |\n", "| 14 | Oct | 2019 | Python | Small | Oct-2019 |\n", "| 10 | Nov | 2020 | Python | Medium | Nov-2020 |\n", "| 12 | Dec | 2021 | Boa | Medium | Dec-2021 |\n", "\n" ], "text/plain": [ " Date Month Year Snake Size MY \n", "1 15 Jun 2015 Python Small Jun-2015\n", "2 11 Jul 2016 Boa Large Jul-2016\n", "3 11 Aug 2017 Python Medium Aug-2017\n", "4 10 Sep 2018 Boa Large Sep-2018\n", "5 14 Oct 2019 Python Small Oct-2019\n", "6 10 Nov 2020 Python Medium Nov-2020\n", "7 12 Dec 2021 Boa Medium Dec-2021" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dataf$MY <- paste(dataf$Month, dataf$Year, sep= \"-\")\n", "dataf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example 3: Multiple Columns" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "
DateMonthYearSnakeSizeMYDMY
15 Jun 2015 Python Small Jun-2015 15 Jun 2015
11 Jul 2016 Boa Large Jul-2016 11 Jul 2016
\n" ], "text/latex": [ "\\begin{tabular}{r|lllllll}\n", " Date & Month & Year & Snake & Size & MY & DMY\\\\\n", "\\hline\n", "\t 15 & Jun & 2015 & Python & Small & Jun-2015 & 15 Jun 2015\\\\\n", "\t 11 & Jul & 2016 & Boa & Large & Jul-2016 & 11 Jul 2016\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| Date | Month | Year | Snake | Size | MY | DMY |\n", "|---|---|---|---|---|---|---|\n", "| 15 | Jun | 2015 | Python | Small | Jun-2015 | 15 Jun 2015 |\n", "| 11 | Jul | 2016 | Boa | Large | Jul-2016 | 11 Jul 2016 |\n", "\n" ], "text/plain": [ " Date Month Year Snake Size MY DMY \n", "1 15 Jun 2015 Python Small Jun-2015 15 Jun 2015\n", "2 11 Jul 2016 Boa Large Jul-2016 11 Jul 2016" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dataf$DMY <- paste(dataf$Date, dataf$Month, dataf$Year)\n", "\n", "head(dataf, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example 4: Using str_c()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
DateMonthYearSnakeSizeMYDMYSnakeNSize
15 Jun 2015 Python Small Jun-2015 15 Jun 2015 Python Small
11 Jul 2016 Boa Large Jul-2016 11 Jul 2016 Boa Large
11 Aug 2017 Python Medium Aug-2017 11 Aug 2017 Python Medium
10 Sep 2018 Boa Large Sep-2018 10 Sep 2018 Boa Large
14 Oct 2019 Python Small Oct-2019 14 Oct 2019 Python Small
10 Nov 2020 Python Medium Nov-2020 10 Nov 2020 Python Medium
\n" ], "text/latex": [ "\\begin{tabular}{r|llllllll}\n", " Date & Month & Year & Snake & Size & MY & DMY & SnakeNSize\\\\\n", "\\hline\n", "\t 15 & Jun & 2015 & Python & Small & Jun-2015 & 15 Jun 2015 & Python Small \\\\\n", "\t 11 & Jul & 2016 & Boa & Large & Jul-2016 & 11 Jul 2016 & Boa Large \\\\\n", "\t 11 & Aug & 2017 & Python & Medium & Aug-2017 & 11 Aug 2017 & Python Medium\\\\\n", "\t 10 & Sep & 2018 & Boa & Large & Sep-2018 & 10 Sep 2018 & Boa Large \\\\\n", "\t 14 & Oct & 2019 & Python & Small & Oct-2019 & 14 Oct 2019 & Python Small \\\\\n", "\t 10 & Nov & 2020 & Python & Medium & Nov-2020 & 10 Nov 2020 & Python Medium\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| Date | Month | Year | Snake | Size | MY | DMY | SnakeNSize |\n", "|---|---|---|---|---|---|---|---|\n", "| 15 | Jun | 2015 | Python | Small | Jun-2015 | 15 Jun 2015 | Python Small |\n", "| 11 | Jul | 2016 | Boa | Large | Jul-2016 | 11 Jul 2016 | Boa Large |\n", "| 11 | Aug | 2017 | Python | Medium | Aug-2017 | 11 Aug 2017 | Python Medium |\n", "| 10 | Sep | 2018 | Boa | Large | Sep-2018 | 10 Sep 2018 | Boa Large |\n", "| 14 | Oct | 2019 | Python | Small | Oct-2019 | 14 Oct 2019 | Python Small |\n", "| 10 | Nov | 2020 | Python | Medium | Nov-2020 | 10 Nov 2020 | Python Medium |\n", "\n" ], "text/plain": [ " Date Month Year Snake Size MY DMY SnakeNSize \n", "1 15 Jun 2015 Python Small Jun-2015 15 Jun 2015 Python Small \n", "2 11 Jul 2016 Boa Large Jul-2016 11 Jul 2016 Boa Large \n", "3 11 Aug 2017 Python Medium Aug-2017 11 Aug 2017 Python Medium\n", "4 10 Sep 2018 Boa Large Sep-2018 10 Sep 2018 Boa Large \n", "5 14 Oct 2019 Python Small Oct-2019 14 Oct 2019 Python Small \n", "6 10 Nov 2020 Python Medium Nov-2020 10 Nov 2020 Python Medium" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "library(stringr)\n", "\n", "dataf$SnakeNSize <- str_c(dataf$Snake,\" \", dataf$Size)\n", "\n", "head(dataf)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example 5: Using Tidyr and unite()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Registered S3 methods overwritten by 'ggplot2':\n", " method from \n", " [.quosures rlang\n", " c.quosures rlang\n", " print.quosures rlang\n", "Registered S3 method overwritten by 'rvest':\n", " method from\n", " read_xml.response xml2\n", "-- Attaching packages --------------------------------------- tidyverse 1.2.1 --\n", "v ggplot2 3.1.1 v readr 1.3.1 \n", "v tibble 2.1.1 v purrr 0.3.2 \n", "v tidyr 0.8.3 v dplyr 0.8.0.1\n", "v ggplot2 3.1.1 v forcats 0.4.0 \n", "-- Conflicts ------------------------------------------ tidyverse_conflicts() --\n", "x dplyr::filter() masks stats::filter()\n", "x dplyr::lag() masks stats::lag()\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
DMYearSnakeSizeMYDMYSnakeNSize
15_Jun 2015 Python Small Jun-2015 15 Jun 2015 Python Small
11_Jul 2016 Boa Large Jul-2016 11 Jul 2016 Boa Large
11_Aug 2017 Python Medium Aug-2017 11 Aug 2017 Python Medium
10_Sep 2018 Boa Large Sep-2018 10 Sep 2018 Boa Large
14_Oct 2019 Python Small Oct-2019 14 Oct 2019 Python Small
10_Nov 2020 Python Medium Nov-2020 10 Nov 2020 Python Medium
12_Dec 2021 Boa Medium Dec-2021 12 Dec 2021 Boa Medium
\n" ], "text/latex": [ "\\begin{tabular}{r|lllllll}\n", " DM & Year & Snake & Size & MY & DMY & SnakeNSize\\\\\n", "\\hline\n", "\t 15\\_Jun & 2015 & Python & Small & Jun-2015 & 15 Jun 2015 & Python Small \\\\\n", "\t 11\\_Jul & 2016 & Boa & Large & Jul-2016 & 11 Jul 2016 & Boa Large \\\\\n", "\t 11\\_Aug & 2017 & Python & Medium & Aug-2017 & 11 Aug 2017 & Python Medium\\\\\n", "\t 10\\_Sep & 2018 & Boa & Large & Sep-2018 & 10 Sep 2018 & Boa Large \\\\\n", "\t 14\\_Oct & 2019 & Python & Small & Oct-2019 & 14 Oct 2019 & Python Small \\\\\n", "\t 10\\_Nov & 2020 & Python & Medium & Nov-2020 & 10 Nov 2020 & Python Medium\\\\\n", "\t 12\\_Dec & 2021 & Boa & Medium & Dec-2021 & 12 Dec 2021 & Boa Medium \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| DM | Year | Snake | Size | MY | DMY | SnakeNSize |\n", "|---|---|---|---|---|---|---|\n", "| 15_Jun | 2015 | Python | Small | Jun-2015 | 15 Jun 2015 | Python Small |\n", "| 11_Jul | 2016 | Boa | Large | Jul-2016 | 11 Jul 2016 | Boa Large |\n", "| 11_Aug | 2017 | Python | Medium | Aug-2017 | 11 Aug 2017 | Python Medium |\n", "| 10_Sep | 2018 | Boa | Large | Sep-2018 | 10 Sep 2018 | Boa Large |\n", "| 14_Oct | 2019 | Python | Small | Oct-2019 | 14 Oct 2019 | Python Small |\n", "| 10_Nov | 2020 | Python | Medium | Nov-2020 | 10 Nov 2020 | Python Medium |\n", "| 12_Dec | 2021 | Boa | Medium | Dec-2021 | 12 Dec 2021 | Boa Medium |\n", "\n" ], "text/plain": [ " DM Year Snake Size MY DMY SnakeNSize \n", "1 15_Jun 2015 Python Small Jun-2015 15 Jun 2015 Python Small \n", "2 11_Jul 2016 Boa Large Jul-2016 11 Jul 2016 Boa Large \n", "3 11_Aug 2017 Python Medium Aug-2017 11 Aug 2017 Python Medium\n", "4 10_Sep 2018 Boa Large Sep-2018 10 Sep 2018 Boa Large \n", "5 14_Oct 2019 Python Small Oct-2019 14 Oct 2019 Python Small \n", "6 10_Nov 2020 Python Medium Nov-2020 10 Nov 2020 Python Medium\n", "7 12_Dec 2021 Boa Medium Dec-2021 12 Dec 2021 Boa Medium " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "library(tidyverse) # or library(tidyr)\n", "\n", "dataf <- dataf %>%\n", " unite(\"DM\", Date:Month)\n", "\n", "dataf" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }