{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### New to Plotly?\n",
"Plotly's Python library is free and open source! [Get started](https://plotly.com/python/getting-started/) by downloading the client and [reading the primer](https://plotly.com/python/getting-started/).\n",
"
You can set up Plotly to work in [online](https://plotly.com/python/getting-started/#initialization-for-online-plotting) or [offline](https://plotly.com/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plotly.com/python/getting-started/#start-plotting-online).\n",
"
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Version Check\n",
"Note: Scatterplot Matrix is available in version 1.9.11+
\n",
"Run `pip install plotly --upgrade` to update your Plotly version"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2.4.1'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly\n",
"plotly.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Basic Scatterplot Matrix"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(10, 2),\n",
" columns=['Column A', 'Column B'])\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, height=800, width=800)\n",
"py.iplot(fig, filename='Basic Scatterplot Matrix')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Index a Column"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(10, 2),\n",
" columns=['Column A', 'Column B'])\n",
"\n",
"dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n",
" 'grape', 'pear', 'pear', 'apple', 'pear'])\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, index='Fruit', size=10, height=800, width=800)\n",
"py.iplot(fig, filename = 'Index a Column')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Box Plots along Diagonal"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(10, 4),\n",
" columns=['Column A', 'Column B', 'Column C', 'Column D'])\n",
"\n",
"dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n",
" 'grape', 'pear', 'pear', 'apple', 'pear'])\n",
"\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, diag='box', index='Fruit',\n",
" height=800, width=800)\n",
"py.iplot(fig, filename='Box plots along Diagonal Subplots')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Histograms along Diagonal"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(20, 4),\n",
" columns=['Column A', 'Column B', 'Column C', 'Column D'])\n",
"\n",
"dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n",
" 'grape', 'pear', 'pear', 'apple', 'pear',\n",
" 'apple', 'apple', 'grape', 'apple', 'apple',\n",
" 'grape', 'pear', 'pear', 'apple', 'pear'])\n",
"\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, diag='histogram', index='Fruit',\n",
" height=800, width=800)\n",
"py.iplot(fig, filename='Histograms along Diagonal Subplots')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Sequential Colormap"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(100, 3),\n",
" columns=['Column A', 'Column B', 'Column C'])\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, diag='histogram',index='Column A',\n",
" colormap='Blues', height=800, width=800)\n",
"py.iplot(fig, filename = 'Use a Sequential Colormap')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Custom Sequential Colormap"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(100, 3),\n",
" columns=['Column A', 'Column B', 'Column C'])\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, diag='histogram', index='Column A',\n",
" colormap=['rgb(100, 150, 255)', '#F0963C', 'rgb(51, 255, 153)'],\n",
" colormap_type='seq', height=800, width=800)\n",
"py.iplot(fig, filename = 'Custom Sequential Colormap')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Partition Numeric Data into Intervals"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.figure_factory as FF\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(100, 3),\n",
" columns=['Column A', 'Column B', 'Column C'])\n",
"\n",
"fig = FF.create_scatterplotmatrix(dataframe, diag='box', index='Column A',\n",
" colormap='Portland', colormap_type='seq',\n",
" endpts=[-1, 0, 1],\n",
" height=800, width=800, size=12,\n",
" marker=dict(symbol=25))\n",
"py.iplot(fig, filename = 'Partition Numeric Data into Intervals')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Categorical Colormap"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.graph_objs as go\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(100, 2),\n",
" columns=['Column A', 'Column B'])\n",
"\n",
"new_column = []\n",
"fruits = ['apple', 'blueberry', 'banana', 'orange', 'raspberry']\n",
"for j in range(100):\n",
" new_column.append(random.choice(fruits))\n",
"dataframe['Fruits'] = pd.Series(new_column, index=dataframe.index)\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, index='Fruits', diag='histogram',\n",
" colormap= ['#d95f0e', (0.2, 0.6, 1), 'rgb(230,247,188)', '#bcbddc', (0.8, 0.7, 0.65)],\n",
" colormap_type='cat',\n",
" height=800, width=800,\n",
" size=15, marker=dict(symbol='square-open'))\n",
"py.iplot(fig, filename = 'Use a Categorical Colormap')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Colormap as a Dictionary"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"import plotly.graph_objs as go\n",
"import plotly.figure_factory as ff\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"\n",
"dataframe = pd.DataFrame(np.random.randn(100, 3),\n",
" columns=['Column A', 'Column B', 'Column C'])\n",
"\n",
"new_column = []\n",
"strange_colors = ['turquoise', 'limegreen', 'goldenrod']\n",
"for j in range(100):\n",
" new_column.append(random.choice(strange_colors))\n",
"dataframe['Colors'] = pd.Series(new_column, index=dataframe.index)\n",
"\n",
"fig = ff.create_scatterplotmatrix(dataframe, diag='box', index='Colors',\n",
" colormap= dict(\n",
" turquoise = '#00F5FF',\n",
" limegreen = '#32CD32',\n",
" goldenrod = '#DAA520'\n",
" ),\n",
" colormap_type='cat',\n",
" height=800, width=800)\n",
"py.iplot(fig, filename = 'Colormap as a Dictionary')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Reference"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on function create_scatterplotmatrix in module plotly.figure_factory._scatterplot:\n",
"\n",
"create_scatterplotmatrix(df, index=None, endpts=None, diag='scatter', height=500, width=500, size=6, title='Scatterplot Matrix', colormap=None, colormap_type='cat', dataframe=None, headers=None, index_vals=None, **kwargs)\n",
" Returns data for a scatterplot matrix.\n",
" \n",
" :param (array) df: array of the data with column headers\n",
" :param (str) index: name of the index column in data array\n",
" :param (list|tuple) endpts: takes an increasing sequece of numbers\n",
" that defines intervals on the real line. They are used to group\n",
" the entries in an index of numbers into their corresponding\n",
" interval and therefore can be treated as categorical data\n",
" :param (str) diag: sets the chart type for the main diagonal plots.\n",
" The options are 'scatter', 'histogram' and 'box'.\n",
" :param (int|float) height: sets the height of the chart\n",
" :param (int|float) width: sets the width of the chart\n",
" :param (float) size: sets the marker size (in px)\n",
" :param (str) title: the title label of the scatterplot matrix\n",
" :param (str|tuple|list|dict) colormap: either a plotly scale name,\n",
" an rgb or hex color, a color tuple, a list of colors or a\n",
" dictionary. An rgb color is of the form 'rgb(x, y, z)' where\n",
" x, y and z belong to the interval [0, 255] and a color tuple is a\n",
" tuple of the form (a, b, c) where a, b and c belong to [0, 1].\n",
" If colormap is a list, it must contain valid color types as its\n",
" members.\n",
" If colormap is a dictionary, all the string entries in\n",
" the index column must be a key in colormap. In this case, the\n",
" colormap_type is forced to 'cat' or categorical\n",
" :param (str) colormap_type: determines how colormap is interpreted.\n",
" Valid choices are 'seq' (sequential) and 'cat' (categorical). If\n",
" 'seq' is selected, only the first two colors in colormap will be\n",
" considered (when colormap is a list) and the index values will be\n",
" linearly interpolated between those two colors. This option is\n",
" forced if all index values are numeric.\n",
" If 'cat' is selected, a color from colormap will be assigned to\n",
" each category from index, including the intervals if endpts is\n",
" being used\n",
" :param (dict) **kwargs: a dictionary of scatterplot arguments\n",
" The only forbidden parameters are 'size', 'color' and\n",
" 'colorscale' in 'marker'\n",
" \n",
" Example 1: Vanilla Scatterplot Matrix\n",
" ```\n",
" import plotly.plotly as py\n",
" from plotly.graph_objs import graph_objs\n",
" from plotly.figure_factory import create_scatterplotmatrix\n",
" \n",
" import numpy as np\n",
" import pandas as pd\n",
" \n",
" # Create dataframe\n",
" df = pd.DataFrame(np.random.randn(10, 2),\n",
" columns=['Column 1', 'Column 2'])\n",
" \n",
" # Create scatterplot matrix\n",
" fig = create_scatterplotmatrix(df)\n",
" \n",
" # Plot\n",
" py.iplot(fig, filename='Vanilla Scatterplot Matrix')\n",
" ```\n",
" \n",
" Example 2: Indexing a Column\n",
" ```\n",
" import plotly.plotly as py\n",
" from plotly.graph_objs import graph_objs\n",
" from plotly.figure_factory import create_scatterplotmatrix\n",
" \n",
" import numpy as np\n",
" import pandas as pd\n",
" \n",
" # Create dataframe with index\n",
" df = pd.DataFrame(np.random.randn(10, 2),\n",
" columns=['A', 'B'])\n",
" \n",
" # Add another column of strings to the dataframe\n",
" df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n",
" 'grape', 'pear', 'pear', 'apple', 'pear'])\n",
" \n",
" # Create scatterplot matrix\n",
" fig = create_scatterplotmatrix(df, index='Fruit', size=10)\n",
" \n",
" # Plot\n",
" py.iplot(fig, filename = 'Scatterplot Matrix with Index')\n",
" ```\n",
" \n",
" Example 3: Styling the Diagonal Subplots\n",
" ```\n",
" import plotly.plotly as py\n",
" from plotly.graph_objs import graph_objs\n",
" from plotly.figure_factory import create_scatterplotmatrix\n",
" \n",
" import numpy as np\n",
" import pandas as pd\n",
" \n",
" # Create dataframe with index\n",
" df = pd.DataFrame(np.random.randn(10, 4),\n",
" columns=['A', 'B', 'C', 'D'])\n",
" \n",
" # Add another column of strings to the dataframe\n",
" df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',\n",
" 'grape', 'pear', 'pear', 'apple', 'pear'])\n",
" \n",
" # Create scatterplot matrix\n",
" fig = create_scatterplotmatrix(df, diag='box', index='Fruit', height=1000,\n",
" width=1000)\n",
" \n",
" # Plot\n",
" py.iplot(fig, filename = 'Scatterplot Matrix - Diagonal Styling')\n",
" ```\n",
" \n",
" Example 4: Use a Theme to Style the Subplots\n",
" ```\n",
" import plotly.plotly as py\n",
" from plotly.graph_objs import graph_objs\n",
" from plotly.figure_factory import create_scatterplotmatrix\n",
" \n",
" import numpy as np\n",
" import pandas as pd\n",
" \n",
" # Create dataframe with random data\n",
" df = pd.DataFrame(np.random.randn(100, 3),\n",
" columns=['A', 'B', 'C'])\n",
" \n",
" # Create scatterplot matrix using a built-in\n",
" # Plotly palette scale and indexing column 'A'\n",
" fig = create_scatterplotmatrix(df, diag='histogram', index='A',\n",
" colormap='Blues', height=800, width=800)\n",
" \n",
" # Plot\n",
" py.iplot(fig, filename = 'Scatterplot Matrix - Colormap Theme')\n",
" ```\n",
" \n",
" Example 5: Example 4 with Interval Factoring\n",
" ```\n",
" import plotly.plotly as py\n",
" from plotly.graph_objs import graph_objs\n",
" from plotly.figure_factory import create_scatterplotmatrix\n",
" \n",
" import numpy as np\n",
" import pandas as pd\n",
" \n",
" # Create dataframe with random data\n",
" df = pd.DataFrame(np.random.randn(100, 3),\n",
" columns=['A', 'B', 'C'])\n",
" \n",
" # Create scatterplot matrix using a list of 2 rgb tuples\n",
" # and endpoints at -1, 0 and 1\n",
" fig = create_scatterplotmatrix(df, diag='histogram', index='A',\n",
" colormap=['rgb(140, 255, 50)',\n",
" 'rgb(170, 60, 115)', '#6c4774',\n",
" (0.5, 0.1, 0.8)],\n",
" endpts=[-1, 0, 1], height=800, width=800)\n",
" \n",
" # Plot\n",
" py.iplot(fig, filename = 'Scatterplot Matrix - Intervals')\n",
" ```\n",
" \n",
" Example 6: Using the colormap as a Dictionary\n",
" ```\n",
" import plotly.plotly as py\n",
" from plotly.graph_objs import graph_objs\n",
" from plotly.figure_factory import create_scatterplotmatrix\n",
" \n",
" import numpy as np\n",
" import pandas as pd\n",
" import random\n",
" \n",
" # Create dataframe with random data\n",
" df = pd.DataFrame(np.random.randn(100, 3),\n",
" columns=['Column A',\n",
" 'Column B',\n",
" 'Column C'])\n",
" \n",
" # Add new color column to dataframe\n",
" new_column = []\n",
" strange_colors = ['turquoise', 'limegreen', 'goldenrod']\n",
" \n",
" for j in range(100):\n",
" new_column.append(random.choice(strange_colors))\n",
" df['Colors'] = pd.Series(new_column, index=df.index)\n",
" \n",
" # Create scatterplot matrix using a dictionary of hex color values\n",
" # which correspond to actual color names in 'Colors' column\n",
" fig = create_scatterplotmatrix(\n",
" df, diag='box', index='Colors',\n",
" colormap= dict(\n",
" turquoise = '#00F5FF',\n",
" limegreen = '#32CD32',\n",
" goldenrod = '#DAA520'\n",
" ),\n",
" colormap_type='cat',\n",
" height=800, width=800\n",
" )\n",
" \n",
" # Plot\n",
" py.iplot(fig, filename = 'Scatterplot Matrix - colormap dictionary ')\n",
" ```\n",
"\n"
]
}
],
"source": [
"help(ff.create_scatterplotmatrix)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting git+https://github.com/plotly/publisher.git\n",
" Cloning https://github.com/plotly/publisher.git to /private/var/folders/tc/bs9g6vrd36q74m5t8h9cgphh0000gn/T/pip-req-build-Yu7QIi\n",
"Building wheels for collected packages: publisher\n",
" Running setup.py bdist_wheel for publisher ... \u001b[?25ldone\n",
"\u001b[?25h Stored in directory: /private/var/folders/tc/bs9g6vrd36q74m5t8h9cgphh0000gn/T/pip-ephem-wheel-cache-PkVAA1/wheels/99/3e/a0/fbd22ba24cca72bdbaba53dbc23c1768755fb17b3af0f33966\n",
"Successfully built publisher\n",
"Installing collected packages: publisher\n",
" Found existing installation: publisher 0.11\n",
" Uninstalling publisher-0.11:\n",
" Successfully uninstalled publisher-0.11\n",
"Successfully installed publisher-0.11\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/nbconvert.py:13: ShimWarning: The `IPython.nbconvert` package has been deprecated since IPython 4.0. You should import from nbconvert instead.\n",
" \"You should import from nbconvert instead.\", ShimWarning)\n",
"/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/publisher/publisher.py:53: UserWarning: Did you \"Save\" this notebook before running this command? Remember to save, always save.\n",
" warnings.warn('Did you \"Save\" this notebook before running this command? '\n",
"/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/publisher/publisher.py:58: UserWarning: Your URL has more than 2 parts... are you sure?\n",
" warnings.warn('Your URL has more than 2 parts... are you sure?')\n"
]
}
],
"source": [
"from IPython.display import display, HTML\n",
"\n",
"display(HTML(''))\n",
"display(HTML(''))\n",
"\n",
"! pip install git+https://github.com/plotly/publisher.git --upgrade\n",
"import publisher\n",
"publisher.publish(\n",
" 'scatterplot-matrix.ipynb', 'python/legacy/scatterplot-matrix/', 'Scatterplot Matrix',\n",
" 'How to make scatterplot-matrix plots in Python with Plotly.',\n",
" title = 'Python Scatterplot Matrix | plotly',\n",
" name = 'Scatterplot Matrix',\n",
" has_thumbnail='true', thumbnail='thumbnail/scatterplot-matrix.jpg',\n",
" language='python', display_as='legacy_charts', order=10.1,\n",
" ipynb= '~notebook_demo/27')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}