{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "URL: http://matplotlib.org/examples/showcase/bachelors_degrees_by_gender.html\n", "\n", "Most examples work across multiple plotting backends equivalent, this example is also available for:\n", "\n", "* [Matplotlib - bachelors_degrees_by_gender](../matplotlib/bachelors_degrees_by_gender.ipynb)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import holoviews as hv\n", "from holoviews import opts\n", "hv.extension('bokeh', 'matplotlib')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "gender_degree_data = pd.read_csv(\"https://datasets.holoviz.org/bachelor_women/v1/percent_bachelors_degrees_women_usa.csv\")\n", "\n", "title = ('Percentage of Bachelor\\'s degrees conferred to women in '\n", " 'the U.S.A. by major (1970-2011)\\n')\n", "\n", "# These are the colors that will be used in the plot\n", "color_sequence = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',\n", " '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',\n", " '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',\n", " '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']\n", "\n", "# Offsets for degree labels\n", "y_offsets = {'Foreign Languages': 0.5, 'English': -0.5,\n", " 'Communications and Journalism': 0.75,\n", " 'Art and Performance': -0.25, 'Agriculture': 1.25,\n", " 'Social Sciences and History': 0.25, 'Business': -0.75,\n", " 'Math and Statistics': 0.75, 'Architecture': -0.75,\n", " 'Computer Science': 0.75, 'Engineering': -0.25}\n", "\n", "# Load the data into a dataframe and us pd.melt to unpivot the degree column\n", "df = pd.DataFrame(gender_degree_data)\n", "df = pd.melt(df, id_vars='Year', var_name='Degree', value_name='conferred')\n", "df['Degree'] = [d.replace('_', ' ').title() for d in df.Degree]\n", "\n", "# Define a formatter that works for both bokeh and matplotlib\n", "def percent_format(x):\n", " try:\n", " return '{:0.0f}%'.format(x)\n", " except:\n", " return '%d%' % x\n", "\n", "# Define the value dimensions\n", "vdim = hv.Dimension('conferred', range=(0, 90))\n", "\n", "# Define the dataset\n", "ds = hv.Dataset(df, vdims=vdim)\n", "curves = ds.to(hv.Curve, 'Year', groupby='Degree').overlay().redim(Year=dict(range=(1970, 2030)))\n", "\n", "# Define a function to get the text annotations\n", "max_year = ds['Year'].max()\n", "def offset(row):\n", " row['conferred'] += y_offsets.get(row.Degree, 0)\n", " return row\n", "label_df = df[df.Year==max_year].apply(offset, axis=1)\n", "labels = hv.Labels(label_df, ['Year', 'conferred'], 'Degree')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Display" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "overlay = (curves * labels).relabel(title)\n", "\n", "gridstyle = {'grid_line_dash': [6, 4], 'grid_line_width': 3, 'grid_bounds': (0, 100)}\n", "\n", "overlay.opts(\n", " opts.Curve(show_frame=False, labelled=[], tools=['hover'],\n", " height=900, width=900, show_legend=False, xticks=[1970, 1980, 1990, 2000, 2010],\n", " color=hv.Cycle(values=color_sequence), line_width=2, show_grid=True, yformatter='%d%%'),\n", " opts.Labels(text_color='Degree', cmap=color_sequence, text_align='left'),\n", " opts.NdOverlay(batched=False, gridstyle=gridstyle))" ] } ], "metadata": { "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 4 }