{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#### New to Plotly?\n", "Plotly's Python library is free and open source! [Get started](https://plotly.com/python/getting-started/) by downloading the client and [reading the primer](https://plotly.com/python/getting-started/).\n", "
You can set up Plotly to work in [online](https://plotly.com/python/getting-started/#initialization-for-online-plotting) or [offline](https://plotly.com/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plotly.com/python/getting-started/#start-plotting-online).\n", "
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Version Check\n", "Note: Scatterplot Matrix is available in version 1.9.11+
\n", "Run `pip install plotly --upgrade` to update your Plotly version" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2.4.1'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly\n", "plotly.__version__" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Basic Scatterplot Matrix" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "dataframe = pd.DataFrame(np.random.randn(10, 2),\n", " columns=['Column A', 'Column B'])\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, height=800, width=800)\n", "py.iplot(fig, filename='Basic Scatterplot Matrix')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Index a Column" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "dataframe = pd.DataFrame(np.random.randn(10, 2),\n", " columns=['Column A', 'Column B'])\n", "\n", "dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n", " 'grape', 'pear', 'pear', 'apple', 'pear'])\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, index='Fruit', size=10, height=800, width=800)\n", "py.iplot(fig, filename = 'Index a Column')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Box Plots along Diagonal" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "dataframe = pd.DataFrame(np.random.randn(10, 4),\n", " columns=['Column A', 'Column B', 'Column C', 'Column D'])\n", "\n", "dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n", " 'grape', 'pear', 'pear', 'apple', 'pear'])\n", "\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, diag='box', index='Fruit',\n", " height=800, width=800)\n", "py.iplot(fig, filename='Box plots along Diagonal Subplots')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Histograms along Diagonal" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "dataframe = pd.DataFrame(np.random.randn(20, 4),\n", " columns=['Column A', 'Column B', 'Column C', 'Column D'])\n", "\n", "dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n", " 'grape', 'pear', 'pear', 'apple', 'pear',\n", " 'apple', 'apple', 'grape', 'apple', 'apple',\n", " 'grape', 'pear', 'pear', 'apple', 'pear'])\n", "\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, diag='histogram', index='Fruit',\n", " height=800, width=800)\n", "py.iplot(fig, filename='Histograms along Diagonal Subplots')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Sequential Colormap" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "dataframe = pd.DataFrame(np.random.randn(100, 3),\n", " columns=['Column A', 'Column B', 'Column C'])\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, diag='histogram',index='Column A',\n", " colormap='Blues', height=800, width=800)\n", "py.iplot(fig, filename = 'Use a Sequential Colormap')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Custom Sequential Colormap" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "dataframe = pd.DataFrame(np.random.randn(100, 3),\n", " columns=['Column A', 'Column B', 'Column C'])\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, diag='histogram', index='Column A',\n", " colormap=['rgb(100, 150, 255)', '#F0963C', 'rgb(51, 255, 153)'],\n", " colormap_type='seq', height=800, width=800)\n", "py.iplot(fig, filename = 'Custom Sequential Colormap')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Partition Numeric Data into Intervals" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.figure_factory as FF\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "dataframe = pd.DataFrame(np.random.randn(100, 3),\n", " columns=['Column A', 'Column B', 'Column C'])\n", "\n", "fig = FF.create_scatterplotmatrix(dataframe, diag='box', index='Column A',\n", " colormap='Portland', colormap_type='seq',\n", " endpts=[-1, 0, 1],\n", " height=800, width=800, size=12,\n", " marker=dict(symbol=25))\n", "py.iplot(fig, filename = 'Partition Numeric Data into Intervals')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Categorical Colormap" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.graph_objs as go\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import random\n", "\n", "dataframe = pd.DataFrame(np.random.randn(100, 2),\n", " columns=['Column A', 'Column B'])\n", "\n", "new_column = []\n", "fruits = ['apple', 'blueberry', 'banana', 'orange', 'raspberry']\n", "for j in range(100):\n", " new_column.append(random.choice(fruits))\n", "dataframe['Fruits'] = pd.Series(new_column, index=dataframe.index)\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, index='Fruits', diag='histogram',\n", " colormap= ['#d95f0e', (0.2, 0.6, 1), 'rgb(230,247,188)', '#bcbddc', (0.8, 0.7, 0.65)],\n", " colormap_type='cat',\n", " height=800, width=800,\n", " size=15, marker=dict(symbol='square-open'))\n", "py.iplot(fig, filename = 'Use a Categorical Colormap')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Colormap as a Dictionary" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "import plotly.graph_objs as go\n", "import plotly.figure_factory as ff\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import random\n", "\n", "dataframe = pd.DataFrame(np.random.randn(100, 3),\n", " columns=['Column A', 'Column B', 'Column C'])\n", "\n", "new_column = []\n", "strange_colors = ['turquoise', 'limegreen', 'goldenrod']\n", "for j in range(100):\n", " new_column.append(random.choice(strange_colors))\n", "dataframe['Colors'] = pd.Series(new_column, index=dataframe.index)\n", "\n", "fig = ff.create_scatterplotmatrix(dataframe, diag='box', index='Colors',\n", " colormap= dict(\n", " turquoise = '#00F5FF',\n", " limegreen = '#32CD32',\n", " goldenrod = '#DAA520'\n", " ),\n", " colormap_type='cat',\n", " height=800, width=800)\n", "py.iplot(fig, filename = 'Colormap as a Dictionary')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Reference" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on function create_scatterplotmatrix in module plotly.figure_factory._scatterplot:\n", "\n", "create_scatterplotmatrix(df, index=None, endpts=None, diag='scatter', height=500, width=500, size=6, title='Scatterplot Matrix', colormap=None, colormap_type='cat', dataframe=None, headers=None, index_vals=None, **kwargs)\n", " Returns data for a scatterplot matrix.\n", " \n", " :param (array) df: array of the data with column headers\n", " :param (str) index: name of the index column in data array\n", " :param (list|tuple) endpts: takes an increasing sequece of numbers\n", " that defines intervals on the real line. They are used to group\n", " the entries in an index of numbers into their corresponding\n", " interval and therefore can be treated as categorical data\n", " :param (str) diag: sets the chart type for the main diagonal plots.\n", " The options are 'scatter', 'histogram' and 'box'.\n", " :param (int|float) height: sets the height of the chart\n", " :param (int|float) width: sets the width of the chart\n", " :param (float) size: sets the marker size (in px)\n", " :param (str) title: the title label of the scatterplot matrix\n", " :param (str|tuple|list|dict) colormap: either a plotly scale name,\n", " an rgb or hex color, a color tuple, a list of colors or a\n", " dictionary. An rgb color is of the form 'rgb(x, y, z)' where\n", " x, y and z belong to the interval [0, 255] and a color tuple is a\n", " tuple of the form (a, b, c) where a, b and c belong to [0, 1].\n", " If colormap is a list, it must contain valid color types as its\n", " members.\n", " If colormap is a dictionary, all the string entries in\n", " the index column must be a key in colormap. In this case, the\n", " colormap_type is forced to 'cat' or categorical\n", " :param (str) colormap_type: determines how colormap is interpreted.\n", " Valid choices are 'seq' (sequential) and 'cat' (categorical). If\n", " 'seq' is selected, only the first two colors in colormap will be\n", " considered (when colormap is a list) and the index values will be\n", " linearly interpolated between those two colors. This option is\n", " forced if all index values are numeric.\n", " If 'cat' is selected, a color from colormap will be assigned to\n", " each category from index, including the intervals if endpts is\n", " being used\n", " :param (dict) **kwargs: a dictionary of scatterplot arguments\n", " The only forbidden parameters are 'size', 'color' and\n", " 'colorscale' in 'marker'\n", " \n", " Example 1: Vanilla Scatterplot Matrix\n", " ```\n", " import plotly.plotly as py\n", " from plotly.graph_objs import graph_objs\n", " from plotly.figure_factory import create_scatterplotmatrix\n", " \n", " import numpy as np\n", " import pandas as pd\n", " \n", " # Create dataframe\n", " df = pd.DataFrame(np.random.randn(10, 2),\n", " columns=['Column 1', 'Column 2'])\n", " \n", " # Create scatterplot matrix\n", " fig = create_scatterplotmatrix(df)\n", " \n", " # Plot\n", " py.iplot(fig, filename='Vanilla Scatterplot Matrix')\n", " ```\n", " \n", " Example 2: Indexing a Column\n", " ```\n", " import plotly.plotly as py\n", " from plotly.graph_objs import graph_objs\n", " from plotly.figure_factory import create_scatterplotmatrix\n", " \n", " import numpy as np\n", " import pandas as pd\n", " \n", " # Create dataframe with index\n", " df = pd.DataFrame(np.random.randn(10, 2),\n", " columns=['A', 'B'])\n", " \n", " # Add another column of strings to the dataframe\n", " df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n", " 'grape', 'pear', 'pear', 'apple', 'pear'])\n", " \n", " # Create scatterplot matrix\n", " fig = create_scatterplotmatrix(df, index='Fruit', size=10)\n", " \n", " # Plot\n", " py.iplot(fig, filename = 'Scatterplot Matrix with Index')\n", " ```\n", " \n", " Example 3: Styling the Diagonal Subplots\n", " ```\n", " import plotly.plotly as py\n", " from plotly.graph_objs import graph_objs\n", " from plotly.figure_factory import create_scatterplotmatrix\n", " \n", " import numpy as np\n", " import pandas as pd\n", " \n", " # Create dataframe with index\n", " df = pd.DataFrame(np.random.randn(10, 4),\n", " columns=['A', 'B', 'C', 'D'])\n", " \n", " # Add another column of strings to the dataframe\n", " df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n", " 'grape', 'pear', 'pear', 'apple', 'pear'])\n", " \n", " # Create scatterplot matrix\n", " fig = create_scatterplotmatrix(df, diag='box', index='Fruit', height=1000,\n", " width=1000)\n", " \n", " # Plot\n", " py.iplot(fig, filename = 'Scatterplot Matrix - Diagonal Styling')\n", " ```\n", " \n", " Example 4: Use a Theme to Style the Subplots\n", " ```\n", " import plotly.plotly as py\n", " from plotly.graph_objs import graph_objs\n", " from plotly.figure_factory import create_scatterplotmatrix\n", " \n", " import numpy as np\n", " import pandas as pd\n", " \n", " # Create dataframe with random data\n", " df = pd.DataFrame(np.random.randn(100, 3),\n", " columns=['A', 'B', 'C'])\n", " \n", " # Create scatterplot matrix using a built-in\n", " # Plotly palette scale and indexing column 'A'\n", " fig = create_scatterplotmatrix(df, diag='histogram', index='A',\n", " colormap='Blues', height=800, width=800)\n", " \n", " # Plot\n", " py.iplot(fig, filename = 'Scatterplot Matrix - Colormap Theme')\n", " ```\n", " \n", " Example 5: Example 4 with Interval Factoring\n", " ```\n", " import plotly.plotly as py\n", " from plotly.graph_objs import graph_objs\n", " from plotly.figure_factory import create_scatterplotmatrix\n", " \n", " import numpy as np\n", " import pandas as pd\n", " \n", " # Create dataframe with random data\n", " df = pd.DataFrame(np.random.randn(100, 3),\n", " columns=['A', 'B', 'C'])\n", " \n", " # Create scatterplot matrix using a list of 2 rgb tuples\n", " # and endpoints at -1, 0 and 1\n", " fig = create_scatterplotmatrix(df, diag='histogram', index='A',\n", " colormap=['rgb(140, 255, 50)',\n", " 'rgb(170, 60, 115)', '#6c4774',\n", " (0.5, 0.1, 0.8)],\n", " endpts=[-1, 0, 1], height=800, width=800)\n", " \n", " # Plot\n", " py.iplot(fig, filename = 'Scatterplot Matrix - Intervals')\n", " ```\n", " \n", " Example 6: Using the colormap as a Dictionary\n", " ```\n", " import plotly.plotly as py\n", " from plotly.graph_objs import graph_objs\n", " from plotly.figure_factory import create_scatterplotmatrix\n", " \n", " import numpy as np\n", " import pandas as pd\n", " import random\n", " \n", " # Create dataframe with random data\n", " df = pd.DataFrame(np.random.randn(100, 3),\n", " columns=['Column A',\n", " 'Column B',\n", " 'Column C'])\n", " \n", " # Add new color column to dataframe\n", " new_column = []\n", " strange_colors = ['turquoise', 'limegreen', 'goldenrod']\n", " \n", " for j in range(100):\n", " new_column.append(random.choice(strange_colors))\n", " df['Colors'] = pd.Series(new_column, index=df.index)\n", " \n", " # Create scatterplot matrix using a dictionary of hex color values\n", " # which correspond to actual color names in 'Colors' column\n", " fig = create_scatterplotmatrix(\n", " df, diag='box', index='Colors',\n", " colormap= dict(\n", " turquoise = '#00F5FF',\n", " limegreen = '#32CD32',\n", " goldenrod = '#DAA520'\n", " ),\n", " colormap_type='cat',\n", " height=800, width=800\n", " )\n", " \n", " # Plot\n", " py.iplot(fig, filename = 'Scatterplot Matrix - colormap dictionary ')\n", " ```\n", "\n" ] } ], "source": [ "help(ff.create_scatterplotmatrix)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Collecting git+https://github.com/plotly/publisher.git\n", " Cloning https://github.com/plotly/publisher.git to /private/var/folders/tc/bs9g6vrd36q74m5t8h9cgphh0000gn/T/pip-req-build-Yu7QIi\n", "Building wheels for collected packages: publisher\n", " Running setup.py bdist_wheel for publisher ... \u001b[?25ldone\n", "\u001b[?25h Stored in directory: /private/var/folders/tc/bs9g6vrd36q74m5t8h9cgphh0000gn/T/pip-ephem-wheel-cache-PkVAA1/wheels/99/3e/a0/fbd22ba24cca72bdbaba53dbc23c1768755fb17b3af0f33966\n", "Successfully built publisher\n", "Installing collected packages: publisher\n", " Found existing installation: publisher 0.11\n", " Uninstalling publisher-0.11:\n", " Successfully uninstalled publisher-0.11\n", "Successfully installed publisher-0.11\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/nbconvert.py:13: ShimWarning: The `IPython.nbconvert` package has been deprecated since IPython 4.0. You should import from nbconvert instead.\n", " \"You should import from nbconvert instead.\", ShimWarning)\n", "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/publisher/publisher.py:53: UserWarning: Did you \"Save\" this notebook before running this command? Remember to save, always save.\n", " warnings.warn('Did you \"Save\" this notebook before running this command? '\n", "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/publisher/publisher.py:58: UserWarning: Your URL has more than 2 parts... are you sure?\n", " warnings.warn('Your URL has more than 2 parts... are you sure?')\n" ] } ], "source": [ "from IPython.display import display, HTML\n", "\n", "display(HTML(''))\n", "display(HTML(''))\n", "\n", "! pip install git+https://github.com/plotly/publisher.git --upgrade\n", "import publisher\n", "publisher.publish(\n", " 'scatterplot-matrix.ipynb', 'python/legacy/scatterplot-matrix/', 'Scatterplot Matrix',\n", " 'How to make scatterplot-matrix plots in Python with Plotly.',\n", " title = 'Python Scatterplot Matrix | plotly',\n", " name = 'Scatterplot Matrix',\n", " has_thumbnail='true', thumbnail='thumbnail/scatterplot-matrix.jpg',\n", " language='python', display_as='legacy_charts', order=10.1,\n", " ipynb= '~notebook_demo/27')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }