{
  "cells": [
    {
      "metadata": {
        "slideshow": {
          "slide_type": "slide"
        }
      },
      "cell_type": "markdown",
      "source": "## Working with dates: lubridate"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "https://rpubs.com/davoodastaraky/lubridate"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Working with Strings: stringr"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "http://edrub.in/CheatSheets/cheatSheetStringr.pdf"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Working with categorical data"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "data <- data.frame(Smoker=c(72L,34L),Non_Smoker=c(44L,53L),row.names = c(\"Male\",\"Female\"))\ndataM <- as.matrix(data)",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "print(dataM)",
      "execution_count": 2,
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": "       Smoker Non_Smoker\nMale       72         44\nFemale     34         53\n"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "### calculate sum of table entries Using base functions",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "margin.table(dataM) # total members (marginal distribution)",
      "execution_count": 3,
      "outputs": [
        {
          "data": {
            "text/html": "203",
            "text/latex": "203",
            "text/markdown": "203",
            "text/plain": "[1] 203"
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "print(margin.table(dataM, 1)) # row-wise summary total male and total females\nprint(margin.table(dataM, 2)) # column-wise summary total Smokers and total Non-Smokers",
      "execution_count": 8,
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": "  Male Female \n   116     87 \n    Smoker Non_Smoker \n       106         97 \n"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "addmargins(dataM) #adding marginal distribution summaries ",
      "execution_count": 16,
      "outputs": [
        {
          "data": {
            "text/html": "<table>\n<thead><tr><th></th><th scope=col>Smoker</th><th scope=col>Non_Smoker</th><th scope=col>Sum</th></tr></thead>\n<tbody>\n\t<tr><th scope=row>Male</th><td> 72</td><td>44 </td><td>116</td></tr>\n\t<tr><th scope=row>Female</th><td> 34</td><td>53 </td><td> 87</td></tr>\n\t<tr><th scope=row>Sum</th><td>106</td><td>97 </td><td>203</td></tr>\n</tbody>\n</table>\n",
            "text/latex": "\\begin{tabular}{r|lll}\n  & Smoker & Non\\_Smoker & Sum\\\\\n\\hline\n\tMale &  72 & 44  & 116\\\\\n\tFemale &  34 & 53  &  87\\\\\n\tSum & 106 & 97  & 203\\\\\n\\end{tabular}\n",
            "text/markdown": "\n| <!--/--> | Smoker | Non_Smoker | Sum | \n|---|---|---|\n| Male |  72 | 44  | 116 | \n| Female |  34 | 53  |  87 | \n| Sum | 106 | 97  | 203 | \n\n\n",
            "text/plain": "       Smoker Non_Smoker Sum\nMale    72    44         116\nFemale  34    53          87\nSum    106    97         203"
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {
        "scrolled": false,
        "trusted": false
      },
      "cell_type": "code",
      "source": "ftable(dataM)",
      "execution_count": 20,
      "outputs": [
        {
          "data": {
            "text/plain": "        Smoker Non_Smoker\n                         \nMale        72         44\nFemale      34         53"
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {
        "scrolled": false,
        "trusted": false
      },
      "cell_type": "code",
      "source": "ftable(addmargins(dataM))",
      "execution_count": 19,
      "outputs": [
        {
          "data": {
            "text/plain": "        Smoker Non_Smoker Sum\n                             \nMale        72         44 116\nFemale      34         53  87\nSum        106         97 203"
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "prop.table(table(dataM)) # this is better for single variable rather than two-way tables",
      "execution_count": 25,
      "outputs": [
        {
          "data": {
            "text/plain": "dataM\n  34   44   53   72 \n0.25 0.25 0.25 0.25 "
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {
        "scrolled": false,
        "trusted": false
      },
      "cell_type": "code",
      "source": "prop.table(dataM, 1)",
      "execution_count": 21,
      "outputs": [
        {
          "data": {
            "text/html": "<table>\n<thead><tr><th></th><th scope=col>Smoker</th><th scope=col>Non_Smoker</th></tr></thead>\n<tbody>\n\t<tr><th scope=row>Male</th><td>0.6206897</td><td>0.3793103</td></tr>\n\t<tr><th scope=row>Female</th><td>0.3908046</td><td>0.6091954</td></tr>\n</tbody>\n</table>\n",
            "text/latex": "\\begin{tabular}{r|ll}\n  & Smoker & Non\\_Smoker\\\\\n\\hline\n\tMale & 0.6206897 & 0.3793103\\\\\n\tFemale & 0.3908046 & 0.6091954\\\\\n\\end{tabular}\n",
            "text/markdown": "\n| <!--/--> | Smoker | Non_Smoker | \n|---|---|\n| Male | 0.6206897 | 0.3793103 | \n| Female | 0.3908046 | 0.6091954 | \n\n\n",
            "text/plain": "       Smoker    Non_Smoker\nMale   0.6206897 0.3793103 \nFemale 0.3908046 0.6091954 "
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "prop.table(dataM, 2)",
      "execution_count": 23,
      "outputs": [
        {
          "data": {
            "text/html": "<table>\n<thead><tr><th></th><th scope=col>Smoker</th><th scope=col>Non_Smoker</th></tr></thead>\n<tbody>\n\t<tr><th scope=row>Male</th><td>0.6792453</td><td>0.4536082</td></tr>\n\t<tr><th scope=row>Female</th><td>0.3207547</td><td>0.5463918</td></tr>\n</tbody>\n</table>\n",
            "text/latex": "\\begin{tabular}{r|ll}\n  & Smoker & Non\\_Smoker\\\\\n\\hline\n\tMale & 0.6792453 & 0.4536082\\\\\n\tFemale & 0.3207547 & 0.5463918\\\\\n\\end{tabular}\n",
            "text/markdown": "\n| <!--/--> | Smoker | Non_Smoker | \n|---|---|\n| Male | 0.6792453 | 0.4536082 | \n| Female | 0.3207547 | 0.5463918 | \n\n\n",
            "text/plain": "       Smoker    Non_Smoker\nMale   0.6792453 0.4536082 \nFemale 0.3207547 0.5463918 "
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "prop.table(dataM)",
      "execution_count": 22,
      "outputs": [
        {
          "data": {
            "text/html": "<table>\n<thead><tr><th></th><th scope=col>Smoker</th><th scope=col>Non_Smoker</th></tr></thead>\n<tbody>\n\t<tr><th scope=row>Male</th><td>0.3546798</td><td>0.2167488</td></tr>\n\t<tr><th scope=row>Female</th><td>0.1674877</td><td>0.2610837</td></tr>\n</tbody>\n</table>\n",
            "text/latex": "\\begin{tabular}{r|ll}\n  & Smoker & Non\\_Smoker\\\\\n\\hline\n\tMale & 0.3546798 & 0.2167488\\\\\n\tFemale & 0.1674877 & 0.2610837\\\\\n\\end{tabular}\n",
            "text/markdown": "\n| <!--/--> | Smoker | Non_Smoker | \n|---|---|\n| Male | 0.3546798 | 0.2167488 | \n| Female | 0.1674877 | 0.2610837 | \n\n\n",
            "text/plain": "       Smoker    Non_Smoker\nMale   0.3546798 0.2167488 \nFemale 0.1674877 0.2610837 "
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Working with categorical data: using gmodels package"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "#install.packages(\"gmodels\")\nlibrary(gmodels)",
      "execution_count": 14,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "CrossTable(dataM) # total contingency table using CrossTable function from gmodels package",
      "execution_count": 15,
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": "\n \n   Cell Contents\n|-------------------------|\n|                       N |\n| Chi-square contribution |\n|           N / Row Total |\n|           N / Col Total |\n|         N / Table Total |\n|-------------------------|\n\n \nTotal Observations in Table:  203 \n\n \n             |  \n             |     Smoker | Non_Smoker |  Row Total | \n-------------|------------|------------|------------|\n        Male |         72 |         44 |        116 | \n             |      2.156 |      2.356 |            | \n             |      0.621 |      0.379 |      0.571 | \n             |      0.679 |      0.454 |            | \n             |      0.355 |      0.217 |            | \n-------------|------------|------------|------------|\n      Female |         34 |         53 |         87 | \n             |      2.875 |      3.142 |            | \n             |      0.391 |      0.609 |      0.429 | \n             |      0.321 |      0.546 |            | \n             |      0.167 |      0.261 |            | \n-------------|------------|------------|------------|\nColumn Total |        106 |         97 |        203 | \n             |      0.522 |      0.478 |            | \n-------------|------------|------------|------------|\n\n \n"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## data.table"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "https://s3.amazonaws.com/assets.datacamp.com/img/blog/data+table+cheat+sheet.pdf"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Piping with `%>%` from `magrittr` package"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## dplyr"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "![Imgur](https://i.imgur.com/DNMxDWh.png?1)"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "**course:**  \nhttps://courses.edx.org/courses/course-v1:HarvardX+PH125.6x+2T2019/course/  "
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "**Data transformation**  \nhttps://r4ds.had.co.nz/transform.html  "
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## working with fivethirtyeight data"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "http://www.storybench.org/getting-started-with-tidyverse-in-r/  \nhttp://www.storybench.org/how-to-explore-a-dataset-from-the-fivethirtyeight-package-in-r/  "
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "<span style=\"color:red; font-family:Comic Sans MS\">References</span>  \n<a href=\"https://tbrieder.org/epidata/course_reading/e_aragon.pdf\" target=\"_blank\">https://tbrieder.org/epidata/course_reading/e_aragon.pdf</a>"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "<span style=\"color:red; font-family:brandon\">Further  Resources</span>  \n<a href=\"https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf\"   target=\"_blank\">https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf</a>    \n<a href=\"https://ugoproto.github.io/ugo_r_doc/dplyr.pdf\" target=\"_blank\">https://ugoproto.github.io/ugo_r_doc/dplyr.pdf</a>  \n<a href=\"https://rpubs.com/bradleyboehmke/data_wrangling\" target=\"_blank\">https://rpubs.com/bradleyboehmke/data_wrangling</a>  "
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "celltoolbar": "Slideshow",
    "hide_input": false,
    "kernelspec": {
      "name": "r",
      "display_name": "R",
      "language": "R"
    },
    "language_info": {
      "mimetype": "text/x-r-source",
      "name": "R",
      "pygments_lexer": "r",
      "version": "3.5.3",
      "file_extension": ".r",
      "codemirror_mode": "r"
    },
    "toc": {
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": false,
      "base_numbering": 1,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": false
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}