{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# NumPy\n", "\n", "Credits: Forked from [Parallel Machine Learning with scikit-learn and IPython](https://github.com/ogrisel/parallel_ml_tutorial) by Olivier Grisel\n", "\n", "* NumPy Arrays, dtype, and shape\n", "* Common Array Operations\n", "* Reshape and Update In-Place\n", "* Combine Arrays\n", "* Create Sample Data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## NumPy Arrays, dtypes, and shapes" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 2 3]\n", "(3,)\n", "int64\n" ] } ], "source": [ "a = np.array([1, 2, 3])\n", "print(a)\n", "print(a.shape)\n", "print(a.dtype)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0 2 4]\n", " [1 3 5]]\n", "(2, 3)\n", "int64\n" ] } ], "source": [ "b = np.array([[0, 2, 4], [1, 3, 5]])\n", "print(b)\n", "print(b.shape)\n", "print(b.dtype)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0., 0., 0., 0., 0.])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.zeros(5)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 1, 1, 1],\n", " [1, 1, 1, 1],\n", " [1, 1, 1, 1]], dtype=int32)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.ones(shape=(3, 4), dtype=np.int32)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Common Array Operations" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0. 1. 2. ]\n", " [0.5 1.5 2.5]]\n", "(2, 3)\n", "float64\n" ] } ], "source": [ "c = b * 0.5\n", "print(c)\n", "print(c.shape)\n", "print(c.dtype)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[1. 3. 5. ]\n", " [1.5 3.5 5.5]]\n" ] } ], "source": [ "d = a + c\n", "print(d)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1., 3., 5.])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d[0]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d[0, 0]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1. , 1.5])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d[:, 0]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "19.5" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.sum()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.25" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.mean()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 2.5, 6.5, 10.5])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.sum(axis=0)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3. , 3.5])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.mean(axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reshape and Update In-Place" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0 1 2 3 4 5 6 7 8 9 10 11]\n" ] } ], "source": [ "e = np.arange(12)\n", "print(e)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0 1 2 3]\n", " [ 4 5 6 7]\n", " [ 8 9 10 11]]\n" ] } ], "source": [ "# f is a view of contents of e\n", "f = e.reshape(3, 4)\n", "print(f)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 1 2 3 4 0 0 0 0 0 0 0]\n" ] } ], "source": [ "# Set values of e from index 5 onwards to 0\n", "e[5:] = 0\n", "print(e)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 1, 2, 3],\n", " [4, 0, 0, 0],\n", " [0, 0, 0, 0]])" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# f is also updated\n", "f" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " C_CONTIGUOUS : True\n", " F_CONTIGUOUS : False\n", " OWNDATA : False\n", " WRITEABLE : True\n", " ALIGNED : True\n", " WRITEBACKIFCOPY : False\n", " UPDATEIFCOPY : False" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# OWNDATA shows f does not own its data\n", "f.flags" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combine Arrays" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 2, 3])" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 2, 4],\n", " [1, 3, 5]])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1. , 3. , 5. ],\n", " [1.5, 3.5, 5.5]])" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 2, 3, 1, 2, 3, 1, 2, 3])" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.concatenate([a, a, a])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1. , 2. , 3. ],\n", " [0. , 2. , 4. ],\n", " [1. , 3. , 5. ],\n", " [1. , 3. , 5. ],\n", " [1.5, 3.5, 5.5]])" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use broadcasting when needed to do this automatically\n", "np.vstack([a, b, d])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0. , 2. , 4. , 1. , 3. , 5. ],\n", " [1. , 3. , 5. , 1.5, 3.5, 5.5]])" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# In machine learning, useful to enrich or \n", "# add new/concatenate features with hstack\n", "np.hstack([b, d])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create Sample Data" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import pylab as plt\n", "import seaborn\n", "\n", "seaborn.set()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create evenly spaced numbers over the specified interval\n", "x = np.linspace(0, 2, 10)\n", "plt.plot(x, 'o-');\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create sample data, add some noise\n", "x = np.random.uniform(1, 100, 1000)\n", "y = np.log(x) + np.random.normal(0, .3, 1000)\n", "\n", "plt.scatter(x, y)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Softmax Function Explanation - Super basics " ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 5],\n", " [ 2],\n", " [-1],\n", " [ 3]])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "ZL = np.array([[5],[2],[-1],[3]])\n", "ZL" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[148.4131591 ],\n", " [ 7.3890561 ],\n", " [ 0.36787944],\n", " [ 20.08553692]])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t = np.exp(ZL)\n", "t" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "176.25563156586637" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sum of ti and this we do by normalizing thse entries, lets add them up\n", "np.sum(t)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.84203357],\n", " [0.04192238],\n", " [0.00208719],\n", " [0.11395685]])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "AL = t / np.sum(t)\n", "AL" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 1 }