{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# IMPORT\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pandas Data Structure - Series" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 1\n", "1 2\n", "2 3\n", "3 -3\n", "4 4\n", "5 0\n", "dtype: int64\n" ] } ], "source": [ "sr = pd.Series([1,2,3,-3,4,0])\n", "print(sr)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 1 2 3 -3 4 0]\n" ] } ], "source": [ "# get values frm Series\n", "print(sr.values)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RangeIndex(start=0, stop=6, step=1)\n" ] } ], "source": [ "# get indexes from Series\n", "print(sr.index)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2nd indexed value: 3 \n", "adding 2nd + 4rd index: 7\n" ] } ], "source": [ "# some calc on Seires\n", "print(\"2nd indexed value:\",sr[2],\n", " \"\\nadding 2nd + 4rd index:\", sr[2]+sr[4])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a 1\n", "b 2\n", "c 0\n", "d -3\n", "e 4\n", "f -6\n", "dtype: int64\n" ] } ], "source": [ "# creating Series with named index\n", "sr1 = pd.Series([1,2,0,-3,4,-6], index=['a','b','c','d','e','f'])\n", "print(sr1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a \t 1\n", "b \t 2\n", "c \t 0\n", "d \t -3\n", "e \t 4\n", "f \t -6\n" ] } ], "source": [ "for e in sr1.index:\n", " print(e, \"\\t\", sr1[e])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a 1\n", "b 2\n", "c 0\n", "e 4\n", "dtype: int64\n" ] } ], "source": [ "# getting only positive values from Series\n", "print(sr1[sr1>=0]) " ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'Texas': 71000, 'Oregon': 16000, 'Ohio': 35000, 'Utah': 5000} \n" ] } ], "source": [ "# we can convert a dictionary into Series\n", "sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}\n", "print(sdata, type(sdata))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Ohio 35000\n", "Oregon 16000\n", "Texas 71000\n", "Utah 5000\n", "dtype: int64\n" ] } ], "source": [ "sr2 = pd.Series(sdata)\n", "print(sr2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "California NaN\n", "Alabama NaN\n", "Oregon 16000.0\n", "Texas 71000.0\n", "dtype: float64\n" ] } ], "source": [ "ind = ['California', 'Alabama', 'Oregon', 'Texas']\n", "sr3 = pd.Series(sdata, index=ind)\n", "print(sr3)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "California True\n", "Alabama True\n", "Oregon False\n", "Texas False\n", "dtype: bool\n" ] } ], "source": [ "# Californica and Alabama's value is NaN (Not a Number) cause there is no value for these index in sdata\n", "# let's check for NULLability\n", "print(pd.isnull(sr3))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "California False\n", "Alabama False\n", "Oregon True\n", "Texas True\n", "dtype: bool\n" ] } ], "source": [ "print(pd.notnull(sr3))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Alabama NaN\n", "California NaN\n", "Ohio NaN\n", "Oregon 32000.0\n", "Texas 142000.0\n", "Utah NaN\n", "dtype: float64\n" ] } ], "source": [ "print(sr2+sr3)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Population\n", "California NaN\n", "Alabama NaN\n", "Oregon 16000.0\n", "Texas 71000.0\n", "Name: States, dtype: float64\n" ] } ], "source": [ "# Series object can have Name and index Name\n", "sr3.name = \"States\"\n", "sr3.index.name = \"Population\"\n", "print(sr3)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "States \t (4,) \t 4\n" ] } ], "source": [ "# Stats about Series\n", "print(sr3.name,\"\\t\",sr3.shape,\"\\t\",len(sr3)) #print name, shape and length" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['California', 'Alabama', 'Oregon', 'Texas'], dtype='object', name='Population') \t Population\n" ] } ], "source": [ "print(sr3.index,\"\\t\",sr3.index.name) #print indexes and index name" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "float64\n" ] } ], "source": [ "print(sr3.dtype) #print data type" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ nan nan 16000. 71000.]\n" ] } ], "source": [ "print(sr3.values) #print Series values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }