# Cars dataset exploration with plotly.py version 3.0

In [1]:
# Apply hack to hide tracebacks for presentation
import hide_ipython_tbs

## Load cars dataset

In [2]:
import pandas as pd
import numpy as np

cars_df = pd.read_csv('data/cars/cars.csv',
                      usecols=['City mpg',
                               'Fuel Type',
                               'Horsepower',
                               'Model Year',
                               'Torque', 'Hybrid', 'ID'])
cars_df.sample(5)

Unnamed: 0,City mpg,Fuel Type,Horsepower,Hybrid,ID,Model Year,Torque
2544,17,Gasoline,280,False,2010 Buick Lacrosse CXS,2010 Buick Lacrosse,259
2028,19,Gasoline,218,False,2011 Infiniti G25x AWD,2011 Infiniti G25,187
1994,12,Gasoline,310,False,2010 Ford Expedition Eddie Bauer EL 4WD,2010 Ford Expedition,365
4124,13,Gasoline,317,False,2012 Nissan Titan King Cab SV,2012 Nissan Titan,385
1558,17,Gasoline,230,False,2011 BMW 328i Sports Wagon,2011 BMW 3 Series Sports Wagon,200


In [3]:
cars_df.shape

(5076, 7)

## Load images of cars

In [4]:
import os

image_data = {}
for img_filename in os.listdir('data/cars/images'):
    model_year = img_filename.split('.')[0]
    with open(f"data/cars/images/{img_filename}", "rb") as f:
        b = f.read()
        image_data[model_year] = b

In [5]:
from ipywidgets import Image
Image(value=image_data['2012_Chevrolet_Camaro_Coupe'])

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x08\x06\x0…

## Construct plotly.py Figure Widget
Torqe vs. MPG Scatter Trace

In [6]:
import plotly.graph_objs as go

In [7]:
fig = go.FigureWidget(
    data=[
        dict(
            type='scattergl',
            x=cars_df['Torque'],
            y=cars_df['City mpg'],
            mode='markers',
        )
    ],
)

### Display Figure
Before online or offline `iplot`. Still supported, but not needed with `FigureWidget`

In [8]:
fig

FigureWidget({
    'data': [{'mode': 'markers',
              'type': 'scattergl',
              'uid': 'e3801…

### Label Figure
Use property assignment syntax to:

Set `fig.layout.title` to `'Torque and Fuel Efficience'`

In [9]:
fig.layout.title = 'Torque and Fuel Efficience'

Check default font size

In [10]:
fig.layout.titlefont.size

Increase the title font size

In [11]:
fig.layout.titlefont.size = 22

Set `fig.layout.titlefont.family` to `'Rockwell'`

In [12]:
fig.layout.titlefont.family = 'Rockwell'

### Create New View for Figure
If working in JupyterLab, right-click on blue bar to the left of the figure and select "Create New View for Output". Drag view to the right half of the screen.

### Label Axes

Set the `fig.layout.xaxis.title` property to `'Torque (foot-pounds)'`

In [13]:
fig.layout.xaxis.title = 'Torque (foot-pounds)'

Set the `fig.layout.yaxis.title` property to `'City MPG'`

In [14]:
fig.layout.yaxis.title = 'City MPG'

### Notice Quantization
Zoom in and notice that the dataset is quantized

### Apply Jitter

In [15]:
scatter = fig.data[0]
scatter

Scattergl({
    'mode': 'markers',
    'uid': 'e3801b38-a0d6-11e8-a63a-645aede86e5b',
    'x': array([236, 207, 207, ..., 398, 330, 330], dtype=int32),
    'y': array([18, 22, 21, ..., 12, 17, 17], dtype=int32)
})

In [16]:
N = len(cars_df)
scatter.x = scatter.x + np.random.rand(N) * 10
scatter.y = scatter.y + np.random.rand(N) * 1

Zoom level did not reset! Plot is updated in-place. Not recreated each time a property changes

### Address Overplotting

Lower marker opacity

In [17]:
scatter.marker.opacity = 0.2

Decrease marker size

In [18]:
scatter.marker.size = 4

### Aside on validation

What if I though opacity ranged from 0 to 255?

In [19]:
# scatter.marker.opacity = 50

What if I forgot the name of an enumeration value?

In [20]:
# fig.layout.hovermode = 'nearest' # Set to 'closest'
fig.layout.hovermode = 'closest'

What if I don't know how to spell 'fuchsia'?

In [21]:
scatter.marker.color = 'fuchsia' # Set to 'fuchsia'

Restore default marker color

In [22]:
scatter.marker.color = None

### Add density contour

Add smoothed density contour trace (`histogram2dcontour`) based on `scatter.x` and `y=scatter.y` values.

In [23]:
contour = fig.add_histogram2dcontour(
    x=scatter.x, y=scatter.y)

Set contour colorscale

In [24]:
contour.colorscale = 'Hot'

Reverse the colorscale

In [25]:
contour.reversescale = True

Disable tooltips for contour

In [26]:
contour.hoverinfo = 'skip'

Tweak marker size and opacity

In [27]:
scatter.marker.opacity = .1
scatter.marker.size = 3

### Create marker configuration widget

Define function that inputs `opacity` and `size` and updates the figure.

In [28]:
def set_opacity(opacity, size):
    scatter.marker.opacity = opacity
    scatter.marker.size = size

Use `ipywidgets.interactive` to generate control panel for function.

In [29]:
from ipywidgets import interactive
opacity_slider = interactive(set_opacity,
                             opacity=(0.0, 1.0, 0.01),
                             size=(1, 10, 0.25))
opacity_slider

interactive(children=(FloatSlider(value=0.5, description='opacity', max=1.0, step=0.01), FloatSlider(value=5.0…

Adjust the width of the slider widgets

In [30]:
opacity_slider.children[0].layout.width = '400px'
opacity_slider.children[1].layout.width = '400px'

Try zooming and then adjusting the marker params

### Looking at outliers

#### Tooltips
Use `'ID'` column as tooltip for scatter

In [31]:
scatter.text = cars_df['ID']
scatter.hoverinfo = 'text'

#### All properties

Create an HTML widget to display the hover properties

In [32]:
from ipywidgets import HTML
details = HTML()
details

HTML(value='')

Register callback function to be executed on hover events. It will update the HTML widget using the pandas `to_html` method.

In [33]:
def hover_fn(trace, points, state):
    ind = points.point_inds[0]
    details.value = cars_df.iloc[ind].to_frame().to_html()

scatter.on_hover(hover_fn)

#### Vehicle image

Create an `ipywidgets.Image` widget to display images

In [34]:
from ipywidgets import Image, Layout
image_widget = Image(
    value=image_data['2012_Chevrolet_Camaro_Coupe'],
    layout=Layout(height='252px', width='400px')
)
image_widget

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x08\x06\x0…

Update hover function to update the image widget along with the HTML widget

In [35]:
def hover_fn(trace, points, state):

    ind = points.point_inds[0]
    
    # Update details HTML widget
    details.value = cars_df.iloc[ind].to_frame().to_html()
    
    # Update image widget
    model_year = cars_df['Model Year'][ind].replace(' ', '_')
    image_widget.value = image_data[model_year]

scatter.on_hover(hover_fn)

## Bringing it all together

Create simple dashboard using `HBox` and `VBox` containers

In [36]:
from ipywidgets import HBox, VBox
VBox([fig,
      opacity_slider,
      HBox([image_widget, details])])

VBox(children=(FigureWidget({
    'data': [{'hoverinfo': 'text',
              'marker': {'opacity': 0.5, 'siz…

In [37]:
from plotly.offline import iplot, init_notebook_mode

In [38]:
init_notebook_mode()

In [39]:
iplot(fig)