{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# NumPy basics" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "# Set seed for reproducibility\n", "np.random.seed(seed=123)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Scalars" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('sk_id_curr: ', array(100001))\n", "('sk_id_curr ndim: ', 0)\n", "('sk_id_curr shape:', ())\n", "('sk_id_curr size: ', 1)\n", "('sk_id_curr dtype: ', dtype('int64'))\n" ] } ], "source": [ "sk_id_curr = np.array(100001)\n", "print (\"sk_id_curr: \", sk_id_curr)\n", "\n", "# Number of dimensions\n", "print (\"sk_id_curr ndim: \", sk_id_curr.ndim)\n", "\n", "# Dimensions\n", "print (\"sk_id_curr shape:\", sk_id_curr.shape)\n", "\n", "# Size of elements\n", "print (\"sk_id_curr size: \", sk_id_curr.size)\n", "\n", "# Data type\n", "print (\"sk_id_curr dtype: \", sk_id_curr.dtype)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1-D Array" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('sk_id_curr:', array([100001, 100005, 100013, 100028, 100038, 100042]))\n", "('sk_id_curr ndim: ', 1)\n", "('sk_id_curr shape:', (6,))\n", "('sk_id_curr size: ', 6)\n", "('sk_id_curr dtype: ', dtype('int64'))\n" ] } ], "source": [ "sk_id_curr = np.array([100001, 100005, 100013, 100028, 100038, 100042])\n", "print (\"sk_id_curr:\", sk_id_curr)\n", "print (\"sk_id_curr ndim: \", sk_id_curr.ndim)\n", "print (\"sk_id_curr shape:\", sk_id_curr.shape)\n", "print (\"sk_id_curr size: \", sk_id_curr.size)\n", "print (\"sk_id_curr dtype: \", sk_id_curr.dtype)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2-D array (matrix)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('sk_id_curr:', array([[100001, 100005],\n", " [100013, 100028],\n", " [100038, 100042]]))\n", "('sk_id_curr ndim: ', 2)\n", "('sk_id_curr shape:', (3, 2))\n", "('sk_id_curr size: ', 6)\n", "('sk_id_curr dtype: ', dtype('int64'))\n" ] } ], "source": [ "sk_id_curr = np.array([[100001, 100005], [100013, 100028], [100038, 100042]])\n", "print (\"sk_id_curr:\", sk_id_curr)\n", "print (\"sk_id_curr ndim: \", sk_id_curr.ndim)\n", "print (\"sk_id_curr shape:\", sk_id_curr.shape)\n", "print (\"sk_id_curr size: \", sk_id_curr.size)\n", "print (\"sk_id_curr dtype: \", sk_id_curr.dtype)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Functions" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('np.zeros((2,2)):', array([[0., 0.],\n", " [0., 0.]]))\n", "('np.ones((2,2)):', array([[1., 1.],\n", " [1., 1.]]))\n", "('np.eye((2)):', array([[1., 0.],\n", " [0., 1.]]))\n", "('np.random.random((2,2)):', array([[0.69646919, 0.28613933],\n", " [0.22685145, 0.55131477]]))\n" ] } ], "source": [ "print (\"np.zeros((2,2)):\", np.zeros((2,2)))\n", "print (\"np.ones((2,2)):\", np.ones((2,2)))\n", "print (\"np.eye((2)):\", np.eye((2)))\n", "print (\"np.random.random((2,2)):\", np.random.random((2,2)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Accessing\n", "\n", "## Indexing" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('sk_id_curr[0]: ', 100001)\n", "('sk_id_curr: ', array([ 0, 100005, 100013, 100028, 100038, 100042]))\n" ] } ], "source": [ "sk_id_curr = np.array([100001, 100005, 100013, 100028, 100038, 100042])\n", "print (\"sk_id_curr[0]: \", sk_id_curr[0])\n", "sk_id_curr[0] = 0\n", "print (\"sk_id_curr: \", sk_id_curr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Slicing" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[100001 100005 111111]\n", " [100013 100028 222222]\n", " [100038 100042 333333]]\n", "('sk_id_curr column 1: ', array([100005, 100028, 100042]))\n", "('sk_id_curr row 0: ', array([100001, 100005, 111111]))\n", "('sk_id_curr rows 0,1,2 & cols 1,2:', array([[100005, 111111],\n", " [100028, 222222],\n", " [100042, 333333]]))\n" ] } ], "source": [ "sk_id_curr = np.array([[100001, 100005, 111111], [100013, 100028, 222222], [100038, 100042, 333333]])\n", "print (sk_id_curr)\n", "print (\"sk_id_curr column 1: \", sk_id_curr[:, 1]) \n", "print (\"sk_id_curr row 0: \", sk_id_curr[0, :]) \n", "print (\"sk_id_curr rows 0,1,2 & cols 1,2:\", sk_id_curr[:3, 1:3]) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Boolean array indexing" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('sk_id_curr:', array([[100001, 100005, 111111],\n", " [100013, 100028, 222222],\n", " [100038, 100042, 333333]]))\n", "('sk_id_curr > 111111:', array([[False, False, False],\n", " [False, False, True],\n", " [False, False, True]]))\n", "('sk_id_curr[sk_id_curr > 111111]:', array([222222, 333333]))\n" ] } ], "source": [ "sk_id_curr = np.array([[100001, 100005, 111111], [100013, 100028, 222222], [100038, 100042, 333333]])\n", "print (\"sk_id_curr:\", sk_id_curr)\n", "print (\"sk_id_curr > 111111:\", sk_id_curr > 111111)\n", "print (\"sk_id_curr[sk_id_curr > 111111]:\", sk_id_curr[sk_id_curr > 111111])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Array math\n", "\n", "## Basic math" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('x + y:', array([[2., 4.],\n", " [6., 8.]]))\n", "('x - y:', array([[0., 0.],\n", " [0., 0.]]))\n", "('x * y:', array([[ 1., 4.],\n", " [ 9., 16.]]))\n" ] } ], "source": [ "x = np.array([[1,2], [3,4]], dtype=np.float64)\n", "y = np.array([[1,2], [3,4]], dtype=np.float64)\n", "print (\"x + y:\", np.add(x, y)) # or x + y\n", "print (\"x - y:\", np.subtract(x, y)) # or x - y\n", "print (\"x * y:\", np.multiply(x, y)) # or x * y" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dot product" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 58. 64.]\n", " [139. 154.]]\n" ] } ], "source": [ "a = np.array([[1,2,3], [4,5,6]], dtype=np.float64) # we can specify dtype\n", "b = np.array([[7,8], [9,10], [11, 12]], dtype=np.float64)\n", "print (a.dot(b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sum across a dimension" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[1 2]\n", " [3 4]]\n", "('sum all: ', 10)\n", "('sum by col: ', array([4, 6]))\n", "('sum by row: ', array([3, 7]))\n" ] } ], "source": [ "x = np.array([[1,2],[3,4]])\n", "print (x)\n", "print (\"sum all: \", np.sum(x)) # adds all elements\n", "print (\"sum by col: \", np.sum(x, axis=0)) # add numbers in each column\n", "print (\"sum by row: \", np.sum(x, axis=1)) # add numbers in each row" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Transposing" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('x:', array([[1, 2],\n", " [3, 4]]))\n", "('x.T:', array([[1, 3],\n", " [2, 4]]))\n" ] } ], "source": [ "print (\"x:\", x)\n", "print (\"x.T:\", x.T)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Broadcasting" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('z:', array([[ 6, 8],\n", " [ 8, 10]]))\n" ] } ], "source": [ "x = np.array([[1,2], [3,4]])\n", "y = np.array([5, 6])\n", "z = x + y\n", "print (\"z:\", z)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reshaping" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[1 2]\n", " [3 4]\n", " [5 6]]\n", "('x.shape: ', (3, 2))\n", "('y.shape: ', (2, 3))\n", "('y:', array([[1, 2, 3],\n", " [4, 5, 6]]))\n" ] } ], "source": [ "x = np.array([[1,2], [3,4], [5,6]])\n", "print (x)\n", "print (\"x.shape: \", x.shape)\n", "y = np.reshape(x, (2, 3))\n", "print (\"y.shape: \", y.shape)\n", "print (\"y:\", y)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }