{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#### Data Science with Microsoft SQL Server 2016\n", "\n", "\"file:\\\\\\C:\\Users\\v-thbeta\\Downloads\\9781509304318_Data%20Science%20with%20Microsoft%20SQL%20Server%202016_pdf.pdf\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "library(RevoScaleR)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#connStr\n", "instance_name <- \"TRB_MICROSOFT\"\n", "database_name <- \"WideWorldImportersDW\"\n", "connStr <- paste(\"Driver={SQL Server Native Client 11.0};Server=\",\n", " instance_name, \";Database=\",database_name,\";Trusted_Connection=yes;\",sep=\"\");" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "TRUE" ], "text/latex": [ "TRUE" ], "text/markdown": [ "TRUE" ], "text/plain": [ "[1] TRUE" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Open a connection with SQL Server to be able to write queries with the rxExecuteSQLDDL function.\n", "\n", "outOdbcDS <- RxOdbcData(table = \"NewData\", connectionString = connStr, useFastRead=TRUE)\n", "rxOpen(outOdbcDS, \"w\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Create a variable to store the data returned from the SQL Server, with the user’s name,\n", "# a variable for the parameters to pass to the SQL Server,\n", "# the values you can pass to the RxSQLServerdata constructor\n", "sqlShareDir <- paste(\"C:\\\\temp\\\\\",Sys.getenv(\"USERNAME\"),sep=\"\")\n", "sqlWait <- TRUE\n", "sqlConsoleOutput <- FALSE" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "'C:\\temp\\v-thbeta'" ], "text/latex": [ "'C:\\textbackslash{}temp\\textbackslash{}v-thbeta'" ], "text/markdown": [ "'C:\\temp\\v-thbeta'" ], "text/plain": [ "[1] \"C:\\\\temp\\\\v-thbeta\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sqlShareDir" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Now we’ll set the compute context for the data object, using all the variables\n", "# we just created.\n", "cc <- RxInSqlServer(connectionString = connStr, shareDir = sqlShareDir, wait = sqlWait, consoleOutput =\n", "sqlConsoleOutput)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Next we can set the compute context to point to SQL Server R Services, defined earlier.\n", "rxSetComputeContext(cc)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# We can then construct the T-SQL query. This one simply brings back three columns.\n", "sampleDataQuery <- \"select Col1, Col2, Col3 from MyTableName\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Finally we run the query, using all of the objects set up in the script.\n", "# Note that we’re using a colClasses variable to convert the data types to something\n", "# R understands, since SQL Server has more datatypes than R, and we’re reading 500 rows\n", "# at a time.\n", "inDataSource <- RxSqlServerData(sqlQuery = sampleDataQuery, connectionString = connStr,\n", "colClasses = c(Col1 = \"numeric\", Col2 = \"numeric\", Col3 = \"numeric\"), rowsPerRead=500)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "FALSE" ], "text/latex": [ "FALSE" ], "text/markdown": [ "FALSE" ], "text/plain": [ "[1] FALSE" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# If I am a new user I might assume at the end of this exercise, I would have extracted rows and columns from SQL/Server\n", "# and expected an R dataframe to be returned. They would have guessed wrong.\n", "is.data.frame(inDataSource)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Formal class 'RxSqlServerData' [package \"RevoScaleR\"] with 23 slots\n", " ..@ inSqlServer : logi(0) \n", " ..@ computeSqlQueryOnly : logi(0) \n", " ..@ table : NULL\n", " ..@ sqlQuery : chr \"select Col1, Col2, Col3 from MyTableName\"\n", " ..@ useFastRead : logi TRUE\n", " ..@ trimSpace : logi TRUE\n", " ..@ server : NULL\n", " ..@ dbmsName : NULL\n", " ..@ databaseName : NULL\n", " ..@ dsn : NULL\n", " ..@ user : NULL\n", " ..@ password : NULL\n", " ..@ connectionString : chr \"Driver={SQL Server Native Client 11.0};Server=TRB_MICROSOFT;Database=WideWorldImportersDW;Trusted_Connection=yes;\"\n", " ..@ rowBuffering : logi TRUE\n", " ..@ writeFactorsAsIndexes: logi FALSE\n", " ..@ isolationLevel : NULL\n", " ..@ id : \n", " ..@ colClasses : Named chr [1:3] \"numeric\" \"numeric\" \"numeric\"\n", " .. ..- attr(*, \"names\")= chr [1:3] \"Col1\" \"Col2\" \"Col3\"\n", " ..@ colInfo : NULL\n", " ..@ returnDataFrame : logi TRUE\n", " ..@ stringsAsFactors : logi FALSE\n", " ..@ rowsOrBlocksPerRead : int 500\n", " ..@ compatibilityRequest :Classes 'CompatibilityRequest', 'R6' \n", " Public:\n", " assertServerCapability: function (capability, notSupported, notKnown) \n", " clone: function (deep = FALSE) \n", " deferredAssertServerCapability: function (capability, notSupported, notKnown) \n", " getRequestedCapabilities: function () \n", " initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown) \n", " merge: function (request) \n", " requestCapability: function (capability) \n", " runDeferredAssertions: function (server) \n", " serialize: function (file) \n", " Private:\n", " deferredRequests: list\n", " notKnown: function (server, capability, warningMessage) \n", " notSupported: function (server, capability, errorMessage) \n", " requestedCapabilities: \n", " runCallback: function (type, server, capability, userHandler) \n", " server: ServerDefinition, AbstractServerDefinition \n" ] } ], "source": [ "# For a new user, they need to begin understanding the object that was just created.\n", "str(inDataSource)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Microsoft][SQL Server Native Client 11.0][SQL Server]Invalid object name 'MyTableName'.\n", "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n", "[Microsoft][SQL Server Native Client 11.0][SQL Server]Invalid object name 'MyTableName'.\n", "[Microsoft][SQL Server Native Client 11.0][SQL Server]Statement(s) could not be prepared.\n", "\n", "Could not open data source.\n" ] }, { "ename": "ERROR", "evalue": "Error in doTryCatch(return(expr), name, parentenv, handler): Could not open data source.\n", "output_type": "error", "traceback": [ "Error in doTryCatch(return(expr), name, parentenv, handler): Could not open data source.\nTraceback:\n", "1. rxImport(inDataSource)", "2. rxImportBase(inSource = inData, outSource = outFile, rowSelection = rowSelection, \n . transforms = transforms, transformFunc = transformFunc, transformVars = transformVars, \n . transformEnvir = transformEnvir, transformPackages = transformPackages, \n . transformObjects = transformObjects, append = append, overwrite = overwrite, \n . numRows = numRows, reportProgress = reportProgress, verbose = verbose, \n . maxRowsByCols = maxRowsByCols, xdfCompressionLevel = xdfCompressionLevel, \n . createCompositeSet = createCompositeSet, blocksPerCompositeFile = blocksPerCompositeFile)", "3. rxCall(\"RxImportDataSource\", params)", "4. tryCatch(.Call(sym, ..., PACKAGE = PACKAGE), interrupt = function(x) {\n . .C(\"RxUserBreak\", ..., PACKAGE = PACKAGE)\n . stop(\"RevoScaleR function interrupted\", call. = FALSE)\n . })", "5. tryCatchList(expr, classes, parentenv, handlers)", "6. tryCatchOne(expr, names, parentenv, handlers[[1L]])", "7. doTryCatch(return(expr), name, parentenv, handler)" ] } ], "source": [ "# A new user can't really do anything with the \"inDataSource\" object and since this chapter ends on page 14, a logical guess\n", "# is to call rxImport on the object. Unfortunately, the user is not warned the example references a non-existent table object. \n", "# And as part of the overall set-up, they were not told to load 'MyTableName' into SQL/Server or alter the example that maps\n", "# to an existing table!\n", "\n", "df1 <-rxImport(inDataSource)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R 3.3", "language": "R", "name": "ir33" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.3.2" } }, "nbformat": 4, "nbformat_minor": 2 }