{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# From Steve Miller's post at\n", "# http://www.dataversity.net/frequencies-r-part-1/\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", "(as 'lib' is unspecified)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "package 'dtplyr' successfully unpacked and MD5 sums checked\n", "\n", "The downloaded binary packages are in\n", "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" ] } ], "source": [ "install.packages(\"dtplyr\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", "(as 'lib' is unspecified)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "package 'readr' successfully unpacked and MD5 sums checked\n", "\n", "The downloaded binary packages are in\n", "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" ] } ], "source": [ "install.packages(\"readr\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", "(as 'lib' is unspecified)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "package 'pryr' successfully unpacked and MD5 sums checked\n", "\n", "The downloaded binary packages are in\n", "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" ] } ], "source": [ "install.packages(\"pryr\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", "(as 'lib' is unspecified)\n", "also installing the dependency 'hms'\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "package 'hms' successfully unpacked and MD5 sums checked\n", "package 'feather' successfully unpacked and MD5 sums checked\n", "\n", "The downloaded binary packages are in\n", "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" ] } ], "source": [ "install.packages(\"feather\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Installing package into 'C:/Users/v-thbeta/Documents/R/win-library/3.3'\n", "(as 'lib' is unspecified)\n", "also installing the dependency 'chron'\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "package 'chron' successfully unpacked and MD5 sums checked\n", "package 'data.table' successfully unpacked and MD5 sums checked\n", "\n", "The downloaded binary packages are in\n", "\tC:\\Users\\v-thbeta\\AppData\\Local\\Temp\\RtmpeUeyLf\\downloaded_packages\n" ] } ], "source": [ "install.packages(\"data.table\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "library(data.table)\n", "library(stringr)\n", "library(feather)\n", "library(data.table)\n", "suppressMessages(library(pryr))\n", "suppressMessages(library(readr))\n", "suppressMessages(library(dtplyr))\n", "setwd(\"C://Users//v-thbeta//Desktop//examples//data//medicaid\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "ename": "ERROR", "evalue": "Error in eval(expr, envir, enclos): could not find function \"import\"\n", "output_type": "error", "traceback": [ "Error in eval(expr, envir, enclos): could not find function \"import\"\nTraceback:\n" ] } ], "source": [ "import(data.table)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning message:\n", "\"10612 parsing failures.\n", " row col expected actual\n", " 695 X20 no trailing characters .7\n", "2327 X20 no trailing characters .6\n", "2758 X20 no trailing characters .5\n", "3095 X20 no trailing characters .1\n", "3780 X20 no trailing characters .1\n", ".... ... ...................... ......\n", "See problems(...) for more details.\n", "\"" ] }, { "ename": "ERROR", "evalue": "Error in `:=`(year, year): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(\":=\").\n", "output_type": "error", "traceback": [ "Error in `:=`(year, year): Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(\":=\").\nTraceback:\n", "1. rbindlist(lapply(list.files(path = \".\", pattern = \"*.txt\"), mkdata), \n . use.names = TRUE, fill = TRUE) %>% tbl_dt", "2. eval(lhs, parent, parent)", "3. eval(expr, envir, enclos)", "4. rbindlist(lapply(list.files(path = \".\", pattern = \"*.txt\"), mkdata), \n . use.names = TRUE, fill = TRUE)", "5. lapply(list.files(path = \".\", pattern = \"*.txt\"), mkdata)", "6. FUN(X[[i]], ...)", "7. `:=`(year, year) # at line 8 of file ", "8. stop(\"Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(\\\":=\\\").\")" ] } ], "source": [ "mkdata <- function(text)\n", "{\n", " slug <- strsplit(text,\"[.]\")[[1]][1]\n", " len <- length(slug)\n", " suppressMessages(cols <- names(read_tsv(text,n_max=1)))\n", " suppressMessages(dt <- tbl_dt(read_tsv(text,skip=2,col_names=FALSE)))\n", " setnames(dt,gsub(\"_\", \"\",tolower(cols)))\n", " df[,year:=year]\n", "}\n", "md_df <-rbindlist(lapply(list.files(path = \".\", pattern=\"*.txt\"), mkdata),\n", " use.names=TRUE, fill=TRUE) %>% tbl_dt" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "help(\":=\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R 3.3", "language": "R", "name": "ir33" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.3.2" } }, "nbformat": 4, "nbformat_minor": 2 }