{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Numpy Basics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ndarray\n", "Arrays are important because they enable you to express batch operations on data without writing any for loops. This is usually called **vectorization**." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2, 3, 4, 5, 6, 7])" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# creating an array\n", "data1 = [2, 3, 4, 5, 6, 7]\n", "np.array(data1)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 2, 3, 4],\n", " [5, 6, 7, 8]])" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2 = ([1, 2, 3, 4], [5, 6, 7, 8])\n", "arr2 = np.array(data2)\n", "arr2" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2, 4)" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr2.shape" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.zeros(10)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1., 1., 1.],\n", " [ 1., 1., 1.]])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.ones((2, 3))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.arange(15)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[2, 4, 6],\n", " [3, 5, 9]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = np.array([[2, 4, 6], \n", " [3, 5, 9]])\n", "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 6, 12, 18],\n", " [ 9, 15, 27]])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data + data + data" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 4, 8, 12],\n", " [ 6, 10, 18]])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data * 2" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2, 3)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dtype('int64')" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.dtype" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0246
1359
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 2 4 6\n", "1 3 5 9" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(data)\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data types for ndarrays" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1. , 2. , 4.5 ],\n", " [ 1.1 , 3.4 , 3.9 ],\n", " [ 0. , 0.88, 0.45]])" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1 = ([1, 2, 4.5], [1.1, 3.4, 3.9], [0, 0.88, 0.45])\n", "arr1 = np.array(data1)\n", "arr1" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dtype('float64')" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr1.dtype" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 2, 4],\n", " [1, 3, 3],\n", " [0, 0, 0]], dtype=int32)" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr1.astype(np.int32)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Subsetting/ Slicing" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1. , 2. , 4.5])" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr1[0]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.0" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr1[0][1]" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1.1, 3.4, 3.9])" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr1[1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mathematical and Statistical Methods" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1.18302338, -1.14660054, -0.76273185],\n", " [ 1.16602911, 0.08769594, 1.23586991]])" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr2 = np.random.randn(2, 3).astype(np.float64)\n", "arr2" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 2.7591682 , -1.25419923, 0.53873069],\n", " [-1.25419923, 1.32238338, 0.98292953],\n", " [ 0.53873069, 0.98292953, 2.10913431]])" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr3 = np.dot(arr2.T, arr2)\n", "arr3" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in sqrt\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] }, { "data": { "text/plain": [ "array([[ 1.66107441, nan, 0.73398276],\n", " [ nan, 1.1499493 , 0.99142802],\n", " [ 0.73398276, 0.99142802, 1.45228589]])" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sqrt(arr3)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.29388099170824028" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr2.mean()" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.97232450582277508" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr2.std()" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.7632859502494416" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr2.sum()" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 2.34905249, -1.0589046 , 0.47313806])" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr2.sum(0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }