This notebook creates a dashboard for evaluation of openness of scientific literature 
It was submitted to the John Hunter Excellence in Plotting Contest 2020

Content under CC-BY-NC-SA 4.0 license 
Code under GNU-GPL v3.0 license 
© 2020 Serena Bonaretti

--- 

Create jupyter-flex parameters: 
(i.e. the following cells are tagged as "parameters")

In [None]:
# title of the dashboard (cell tagged as "parameters")
flex_title = "Open Data, Open Software, and Open Access Publications in Knee Cartilage Segmentation Literature"

In [None]:
# add link to the source code in the top bar
flex_source_code = "https://github.com/sbonaretti/Hunter_viz_2020/blob/master/open_literature_flex.ipynb"

In [None]:
# layout
flex_orientation = "rows"

--- 

Imports:

In [4]:
import wget # to download from zenodo
import pandas as pd 
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

import ipywidgets as widgets
from ipywidgets import *

--- 

Load the data:

In [5]:
# for Jupyter notebook and Binder (not JupyterLab)
#alt.renderers.enable('notebook')

# file name and zenodo url
file_name = "cart_segm_literature_viz.csv"
zenodo_url = "https://zenodo.org/record/3872040/files/" # are the last digits of the specific version of dataset DOI

# download
wget.download(zenodo_url + file_name, "./" + file_name) # input, output

# load literature table
literature = pd.read_csv("./" + file_name)

# replace underscore with space and opening bracket
literature["bibtex_id"] = literature["bibtex_id"].str.replace('_',' (')
# adding closing bracket
literature["bibtex_id"] = literature["bibtex_id"].astype(str) + ")" 

# adding little randomness to latitude and longitude to avoid dot overlaps
np.random.seed(seed=3) # if this is not present, the cell is not reproducible

# add randomness to latitude
random_lat = np.random.uniform(low=0.0, high=2.5, size=(len(literature["latitude"]),))
literature["latitude_random"] = literature["latitude"] + pd.Series(random_lat)

# add randomness to longitude
random_lon = np.random.uniform(low=0.0, high=2.5, size=(len(literature["longitude"]),))
literature["longitude_random"] = literature["longitude"] + pd.Series(random_lon)

--- 
---

The following markdown cells corresponding to titles are used by jupyter-flex to create the sessions of the dashboard 
Cells containing comments start with ->


## Row 1 of dashboard

---
-> Create the sidebar: 

### Select one or more criteria 

In [6]:
# declare the ipywidgets of the left sidebar

open_original_data = widgets.Checkbox(
 value=False,
 description='Open original data',
 disabled=False,
 indent=False
)

open_derived_data = widgets.Checkbox(
 value=False,
 description='Open derived data',
 disabled=False,
 indent=False
)

open_software = widgets.Checkbox(
 value=False,
 description='Open-source software',
 disabled=False,
 indent=False
)

open_access = widgets.Checkbox(
 value=False,
 description='Open access paper',
 disabled=False,
 indent=False
)

# combine the widgets in a vertical box
VBox([open_original_data, open_derived_data, open_software, open_access])

VBox(children=(Checkbox(value=False, description='Open original data', indent=False), Checkbox(value=False, de…

---
-> Create the literature map:

### Literature Map

In [7]:
# decleare the widget where the map is going to be displayed
output_map = widgets.Output()

In [8]:
# create the output map using plotly
fig = go.FigureWidget()

fig = px.scatter_geo(literature, lat="latitude_random", lon= "longitude_random", projection="equirectangular",
 hover_name = "bibtex_id" )

fig.data[0]['marker']['color'] = "black"
fig.data[0]['marker']['size'] = 8

margin = go.layout.Margin(l=20, r=20, b=20, t=20)
fig = fig.update_layout(margin=margin)

In [9]:
def on_value_change(change):
 
 output_map.clear_output()

 # update the map
 with output_map:
 
 flag_checkbox = 1

 # select rows that match the criteria
 
 # one checkbox is clicked
 if (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == False):
 current_data = literature.query(' link_to_open_original_data != "not_available" ')
 end_result_text = "open original data"
 elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == False):
 current_data = literature.query(' link_to_open_derived_data != "not_available" ')
 end_result_text = "open derived data"
 elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == False):
 current_data = literature.query(' link_to_open_source_code != "not_available" ')
 end_result_text = "open-source software"
 elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == True):
 current_data = literature.query(' link_to_open_access != "not_available" ')
 end_result_text = "open access publication"
 
 # two checkboxes are clicked
 elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == False):
 current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available"')
 end_result_text = "open original data and open derived data"
 elif (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == False):
 current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_source_code != "not_available"')
 end_result_text = "open original data and open-source code"
 elif (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == True):
 current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_access != "not_available"')
 end_result_text = "open original data and access publication"
 elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == False):
 current_data = literature.query(' link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available"')
 end_result_text = "open derived data and open source data"
 elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == True):
 current_data = literature.query(' link_to_open_derived_data != "not_available" & link_to_open_access != "not_available"')
 end_result_text = "open derived data and open access publication"
 elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == True):
 current_data = literature.query(' link_to_open_source_code != "not_available" & link_to_open_access != "not_available"')
 end_result_text = "open-source code and open access publication"

 # # three checkboxes are clicked
 elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == True):
 current_data = literature.query(' link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available" & link_to_open_access != "not_available"') 
 end_result_text = "open derived data, open-source code, and open access publication"
 elif (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == True):
 current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_source_code != "not_available" & link_to_open_access != "not_available"') 
 end_result_text = "open original data, open-source code, and open access publication"
 elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == True):
 current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available" & link_to_open_access != "not_available"') 
 end_result_text = "open original data, open derived data, and open access publication"
 elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == False):
 current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available"') 
 end_result_text = "open original data, open derived data, and open-source code"

 # all checkboxes are clicked
 elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == True):
 current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available" & link_to_open_access != "not_available"') 
 end_result_text = "open original data, open derived data, open-source code, and open access publication"

 # no checkbox is clicked 
 elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == False):
 current_data = literature # for output_table
 flag_checkbox = 0

 # create color list and transform it to a pandas series
 color_list = ["black"]*literature.shape[0]
 color_series = pd.Series(color_list)
 
 # change color to red for cells satisfing the query (when at least a checkbox is clicked, i.e. exclude the last elif where color_flag == 1)
 if flag_checkbox == 1: 
 color_series.loc[current_data.index.values] = "red"

 # update the colors in the figure
 fig.data[0]["marker"]['color'] = color_series
 
 
 display(fig)
 


 # update the table output
 
 output_table.clear_output()
 
 with output_table:
 if flag_checkbox == 1:
 
 # print out the text
 if current_data.shape[0] == 0 or current_data.shape[0] == 1: # has
 report.value = "Out of " + str(literature.shape[0]) + " papers, " + str(current_data.shape[0]) + " has " + end_result_text
 else: # have 
 report.value = "Out of " + str(literature.shape[0]) + " papers, " + str(current_data.shape[0]) + " have " + end_result_text
 
 # clear the table for print out
 current_data = current_data.drop(["algorithm_type", "bibtex_id", "latitude", "longitude", "latitude_random", "longitude_random"], axis=1)
 current_data = current_data.rename(columns={"author_1": "First Author", "country_last_author":"Country", "title": "Title", "year":"Year", "link_to_open_access":"Open Access Publication", \
 "link_to_open_original_data": "Original Data", "link_to_open_derived_data": "Derived Data", "link_to_open_source_code":"Open Source Code" })

 # print out the table
 display (current_data)
 else: 
 report.value = " "
 

# observe functions for the widgets
open_original_data.observe(on_value_change, names = "value")
open_derived_data.observe(on_value_change, names = "value")
open_software.observe(on_value_change, names = "value")
open_access.observe(on_value_change, names = "value")

In [10]:
# show output map
output_map

Output()

---
---

## Row 2 of dashboard

---
-> Write the outputs of the selections:

### Selected papers

In [11]:
# create the widgets for the session

report = widgets.Label(" ") # it will contain the sentence about how many paper match a criteria
output_table = widgets.Output() # it will show the output table 

In [12]:
# show the widgets
VBox([report, output_table])

VBox(children=(Label(value=' '), Output()))

---
-> Footer 

Content under CC-BY-NC-SA 4.0 license 
Code under GNU-GPL v3.0 license 
© 2020 Serena Bonaretti

---
-> Dependencies for reproducibility of this notebook

In [13]:
%load_ext watermark

# python, ipython, packages, and machine characteristics
%watermark -v -m -p wget,pandas,numpy,plotly,jupyter_flex,voila,watermark 

CPython 3.7.6
IPython 7.13.0

wget 3.2
pandas 1.0.3
numpy 1.18.1
plotly 4.8.0
jupyter_flex 0.5.0
voila 0.1.21
watermark 2.0.2

compiler : Clang 4.0.1 (tags/RELEASE_401/final)
system : Darwin
release : 19.4.0
machine : x86_64
processor : i386
CPU cores : 4
interpreter: 64bit
