{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Interchange Examples" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ " " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "INSTALLED VERSIONS\n", "------------------\n", "commit: None\n", "python: 3.6.3.final.0\n", "python-bits: 64\n", "OS: Linux\n", "OS-release: 3.10.0-693.11.1.el7.x86_64\n", "machine: x86_64\n", "processor: x86_64\n", "byteorder: little\n", "LC_ALL: None\n", "LANG: en_US.UTF-8\n", "LOCALE: en_US.UTF-8\n", "\n", "pandas: 0.20.3\n", "pytest: 3.2.1\n", "pip: 9.0.1\n", "setuptools: 38.2.4\n", "Cython: 0.26.1\n", "numpy: 1.13.3\n", "scipy: 0.19.1\n", "xarray: None\n", "IPython: 6.2.1\n", "sphinx: 1.6.3\n", "patsy: 0.4.1\n", "dateutil: 2.6.1\n", "pytz: 2017.2\n", "blosc: None\n", "bottleneck: 1.2.1\n", "tables: 3.4.2\n", "numexpr: 2.6.2\n", "feather: 0.4.0\n", "matplotlib: 2.1.0\n", "openpyxl: 2.4.8\n", "xlrd: 1.1.0\n", "xlwt: 1.3.0\n", "xlsxwriter: 1.0.2\n", "lxml: 4.1.0\n", "bs4: 4.6.0\n", "html5lib: 0.999999999\n", "sqlalchemy: 1.1.13\n", "pymysql: None\n", "psycopg2: None\n", "jinja2: 2.9.6\n", "s3fs: None\n", "pandas_gbq: None\n", "pandas_datareader: None\n" ] } ], "source": [ "import feather\n", "import pandas as pd\n", "import saspy\n", "pd.show_versions()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using SAS Config named: default\n", "SAS Connection established. Subprocess id is 4942\n", "\n" ] } ], "source": [ "sas = saspy.SASsession()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Example from:\n", "https://github.com/sassoftware/saspy/blob/master/saspy_example_github.ipynb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cars = sas.sasdata('cars', libref='sashelp')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ " print(type(cars))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "SAS dataset SASHELP.CARS converted to panda dataframe car_df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "car_df = cars.to_df()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ " print(type(car_df))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "panda dataframe car_df converted to SAS dataset WORK.CARS" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "car_df2 = sas.df2sd(car_df, 'cars')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ " print(type(car_df2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read a dataframe from disk with the feather library created previously from a R dataframe" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "path = '/home/sas/notebook/r_staff.feather'\n", "pd_staff = feather.read_dataframe(path)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "employee category\n", "salary float64\n", "startdate datetime64[ns]\n", "stringsASFactors bool\n", "dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd_staff.dtypes" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(pd_staff))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " employee salary startdate stringsASFactors\n", "0 Thomas Gunther 27500.0 2010-11-01 False\n", "1 Nicholas Harbinger 33900.0 2008-03-25 False\n", "2 Gisela Benito 28000.0 2007-03-14 False\n", "3 Herbert Rudelich 35000.0 2011-01-24 False\n", "4 Emily Sirignano 36350.0 2009-12-15 False\n", "5 Michael Morrison 40000.0 2009-10-10 False\n", "6 Jacqueline Onieda 36500.0 2015-01-02 False\n" ] } ], "source": [ "print(pd_staff)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The dataframe pd_staff appears acceptable to Python, but is not acceptable to the sas.df2sd call" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "ename": "NotImplementedError", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msd_staff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf2sd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd_staff\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'staff'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdf2sd\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSASdata\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \"\"\"\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '',\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 639\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_io\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 640\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref)\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0minput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"'\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"'n \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtypes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'O'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'S'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'U'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'V'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 896\u001b[0;31m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 897\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 2127\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2128\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_action\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2129\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2130\u001b[0m \u001b[0mmap_f\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNotImplementedError\u001b[0m: " ] } ], "source": [ "sd_staff = sas.df2sd(pd_staff, 'staff')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Copy the pd_staff dataframe with default deep=True" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "140465635419640" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "copy_deep_true = pd_staff.copy()\n", "id(pd_staff)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "140464967919432" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(type(copy_deep_true))\n", "id(copy_deep_true)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Call the sas.df2sd method. The copied dataframe is not acceptable to this call." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "ename": "NotImplementedError", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcdt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf2sd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcopy_deep_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'staff_t'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdf2sd\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSASdata\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \"\"\"\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '',\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 639\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_io\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 640\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref)\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0minput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"'\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"'n \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtypes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'O'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'S'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'U'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'V'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 896\u001b[0;31m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 897\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 2127\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2128\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_action\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2129\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2130\u001b[0m \u001b[0mmap_f\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNotImplementedError\u001b[0m: " ] } ], "source": [ "cdt = sas.df2sd(copy_deep_true, 'staff_t')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Copy pd_staff operation with deep=false parameter" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "140465635419640" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "copy_deep_false = pd_staff.copy(deep=False)\n", "id(pd_staff)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "140464967921000" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(type(copy_deep_false))\n", "id(copy_deep_false)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Call the sas.df2sd method. copy_deep_false dataframe is not acceptable." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "ename": "NotImplementedError", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf2sd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcopy_deep_false\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'staff_f'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdf2sd\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSASdata\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \"\"\"\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '',\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref, results)\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 639\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_io\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe2sasdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 640\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibref\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py\u001b[0m in \u001b[0;36mdataframe2sasdata\u001b[0;34m(self, df, table, libref)\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0minput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"'\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"'n \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtypes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'O'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'S'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'U'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'V'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 896\u001b[0;31m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 897\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0mcol_l\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mmap\u001b[0;34m(self, arg, na_action)\u001b[0m\n\u001b[1;32m 2127\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2128\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mna_action\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2129\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2130\u001b[0m \u001b[0mmap_f\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNotImplementedError\u001b[0m: " ] } ], "source": [ "cdf = sas.df2sd(copy_deep_false, 'staff_f')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create the medals dataframe with the read.csv method" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df_medals = pd.read_csv(\"http://winterolympicsmedals.com/medals.csv\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
YearCitySportDisciplineNOCEventEvent genderMedal
01924ChamonixSkatingFigure skatingAUTindividualMSilver
11924ChamonixSkatingFigure skatingAUTindividualWGold
21924ChamonixSkatingFigure skatingAUTpairsXGold
31924ChamonixBobsleighBobsleighBELfour-manMBronze
41924ChamonixIce HockeyIce HockeyCANice hockeyMGold
\n", "
" ], "text/plain": [ " Year City Sport Discipline NOC Event Event gender \\\n", "0 1924 Chamonix Skating Figure skating AUT individual M \n", "1 1924 Chamonix Skating Figure skating AUT individual W \n", "2 1924 Chamonix Skating Figure skating AUT pairs X \n", "3 1924 Chamonix Bobsleigh Bobsleigh BEL four-man M \n", "4 1924 Chamonix Ice Hockey Ice Hockey CAN ice hockey M \n", "\n", " Medal \n", "0 Silver \n", "1 Gold \n", "2 Gold \n", "3 Bronze \n", "4 Gold " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_medals.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Call the sas.df2sd method to create the SAs dataset WORK.medals2" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sd_medals = sas.df2sd(df_medals, 'medals2')" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Attributes': Member Label1 \\\n", " 0 WORK.MEDALS2 Data Set Name \n", " 1 WORK.MEDALS2 Member Type \n", " 2 WORK.MEDALS2 Engine \n", " 3 WORK.MEDALS2 Created \n", " 4 WORK.MEDALS2 Last Modified \n", " 5 WORK.MEDALS2 Protection \n", " 6 WORK.MEDALS2 Data Set Type \n", " 7 WORK.MEDALS2 Label \n", " 8 WORK.MEDALS2 Data Representation \n", " 9 WORK.MEDALS2 Encoding \n", " \n", " cValue1 nValue1 \\\n", " 0 WORK.MEDALS2 NaN \n", " 1 DATA NaN \n", " 2 V9 NaN \n", " 3 01/14/2018 15:12:11 1.831562e+09 \n", " 4 01/14/2018 15:12:11 1.831562e+09 \n", " 5 NaN \n", " 6 NaN \n", " 7 NaN \n", " 8 SOLARIS_X86_64, LINUX_X86_64, ALPHA_TRU64, LIN... NaN \n", " 9 latin1 Western (ISO) NaN \n", " \n", " Label2 cValue2 nValue2 \n", " 0 Observations 2311 2311.0 \n", " 1 Variables 8 8.0 \n", " 2 Indexes 0 0.0 \n", " 3 Observation Length 96 96.0 \n", " 4 Deleted Observations 0 0.0 \n", " 5 Compressed NO NaN \n", " 6 Sorted NO NaN \n", " 7 0.0 \n", " 8 0.0 \n", " 9 0.0 ,\n", " 'Enginehost': Member Label1 \\\n", " 0 WORK.MEDALS2 Data Set Page Size \n", " 1 WORK.MEDALS2 Number of Data Set Pages \n", " 2 WORK.MEDALS2 First Data Page \n", " 3 WORK.MEDALS2 Max Obs per Page \n", " 4 WORK.MEDALS2 Obs in First Data Page \n", " 5 WORK.MEDALS2 Number of Data Set Repairs \n", " 6 WORK.MEDALS2 Filename \n", " 7 WORK.MEDALS2 Release Created \n", " 8 WORK.MEDALS2 Host Created \n", " 9 WORK.MEDALS2 Inode Number \n", " 10 WORK.MEDALS2 Access Permission \n", " 11 WORK.MEDALS2 Owner Name \n", " 12 WORK.MEDALS2 File Size \n", " 13 WORK.MEDALS2 File Size (bytes) \n", " \n", " cValue1 nValue1 \n", " 0 65536 65536.0 \n", " 1 4 4.0 \n", " 2 1 1.0 \n", " 3 681 681.0 \n", " 4 652 652.0 \n", " 5 0 0.0 \n", " 6 /home/sas/tmp/SAS_work41FB0000134E_localhost.l... NaN \n", " 7 9.0401M4 NaN \n", " 8 Linux NaN \n", " 9 149271914 149271914.0 \n", " 10 rw-rw-r-- NaN \n", " 11 sas NaN \n", " 12 320KB NaN \n", " 13 327680 327680.0 ,\n", " 'Variables': Member Num Variable Type Len Pos\n", " 0 WORK.MEDALS2 2 City Char 22 8\n", " 1 WORK.MEDALS2 4 Discipline Char 15 40\n", " 2 WORK.MEDALS2 6 Event Char 31 58\n", " 3 WORK.MEDALS2 7 Event gender Char 1 89\n", " 4 WORK.MEDALS2 8 Medal Char 6 90\n", " 5 WORK.MEDALS2 5 NOC Char 3 55\n", " 6 WORK.MEDALS2 3 Sport Char 10 30\n", " 7 WORK.MEDALS2 1 Year Num 8 0}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ " sd_medals.contents()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Access Method = STDIO\n", "SAS Config name = default\n", "WORK Path = /home/sas/tmp/SAS_work41FB0000134E_localhost.localdomain/\n", "SAS Version = 9.04.01M4P11092016\n", "SASPy Version = 2.2.1\n", "Teach me SAS = False\n", "Batch = False\n", "Results = Pandas\n", "SAS Session Encoding = LATIN1\n", "Python Encoding value = utf-8" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sas" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }