{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Artificial Intelligence Publications "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sebastian Raschka 08/31/2015 \n",
      "\n",
      "CPython 3.4.3\n",
      "IPython 4.0.0\n",
      "\n",
      "plotly 1.8.3\n",
      "pandas 0.16.2\n"
     ]
    }
   ],
   "source": [
    "%load_ext watermark\n",
    "%watermark -a 'Sebastian Raschka' -d -v -p plotly,pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preparing the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Country</th>\n",
       "      <th>Code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AFG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>ALB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>DZA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>American Samoa</td>\n",
       "      <td>ASM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>AND</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Country Code\n",
       "0     Afghanistan  AFG\n",
       "1         Albania  ALB\n",
       "2         Algeria  DZA\n",
       "3  American Samoa  ASM\n",
       "4         Andorra  AND"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "templ = pd.read_csv('./data/template.csv')\n",
    "templ = templ[['COUNTRY', 'CODE']]\n",
    "templ.columns = ['Country', 'Code']\n",
    "templ.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Rank</th>\n",
       "      <th>Country</th>\n",
       "      <th>Documents</th>\n",
       "      <th>Citable documents</th>\n",
       "      <th>Citations</th>\n",
       "      <th>Self-Citations</th>\n",
       "      <th>Citations per Document</th>\n",
       "      <th>H index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>China</td>\n",
       "      <td>80685</td>\n",
       "      <td>0</td>\n",
       "      <td>286809</td>\n",
       "      <td>179188</td>\n",
       "      <td>11.22</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>United States</td>\n",
       "      <td>65565</td>\n",
       "      <td>0</td>\n",
       "      <td>1063805</td>\n",
       "      <td>339426</td>\n",
       "      <td>23.22</td>\n",
       "      <td>345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Japan</td>\n",
       "      <td>27049</td>\n",
       "      <td>0</td>\n",
       "      <td>139799</td>\n",
       "      <td>45035</td>\n",
       "      <td>8.44</td>\n",
       "      <td>116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>22460</td>\n",
       "      <td>0</td>\n",
       "      <td>266440</td>\n",
       "      <td>55920</td>\n",
       "      <td>18.00</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Germany</td>\n",
       "      <td>17364</td>\n",
       "      <td>0</td>\n",
       "      <td>146713</td>\n",
       "      <td>30620</td>\n",
       "      <td>14.99</td>\n",
       "      <td>142</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Rank         Country  Documents  Citable documents  Citations  \\\n",
       "0     1           China      80685                  0     286809   \n",
       "1     2   United States      65565                  0    1063805   \n",
       "2     3           Japan      27049                  0     139799   \n",
       "3     4  United Kingdom      22460                  0     266440   \n",
       "4     5         Germany      17364                  0     146713   \n",
       "\n",
       "   Self-Citations  Citations per Document  H index  \n",
       "0          179188                   11.22      150  \n",
       "1          339426                   23.22      345  \n",
       "2           45035                    8.44      116  \n",
       "3           55920                   18.00      174  \n",
       "4           30620                   14.99      142  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_ai = pd.read_csv('./data/scimagojr_ai_countryrank.csv')\n",
    "rank_ai.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Country</th>\n",
       "      <th>Documents</th>\n",
       "      <th>Citations per Document</th>\n",
       "      <th>Clean Citation/Doc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>China</td>\n",
       "      <td>80685</td>\n",
       "      <td>11.22</td>\n",
       "      <td>1.333841</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>United States</td>\n",
       "      <td>65565</td>\n",
       "      <td>23.22</td>\n",
       "      <td>11.048257</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Japan</td>\n",
       "      <td>27049</td>\n",
       "      <td>8.44</td>\n",
       "      <td>3.503420</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>22460</td>\n",
       "      <td>18.00</td>\n",
       "      <td>9.373108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Germany</td>\n",
       "      <td>17364</td>\n",
       "      <td>14.99</td>\n",
       "      <td>6.685844</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Country  Documents  Citations per Document  Clean Citation/Doc\n",
       "0           China      80685                   11.22            1.333841\n",
       "1   United States      65565                   23.22           11.048257\n",
       "2           Japan      27049                    8.44            3.503420\n",
       "3  United Kingdom      22460                   18.00            9.373108\n",
       "4         Germany      17364                   14.99            6.685844"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_ai['Clean Citation/Doc'] = (rank_ai['Citations'] - rank_ai['Self-Citations']) / rank_ai['Documents']\n",
    "rank_ai=rank_ai[['Country', \n",
    "             'Documents', \n",
    "             'Citations per Document', \n",
    "             'Clean Citation/Doc']]\n",
    "rank_ai.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Country</th>\n",
       "      <th>Documents</th>\n",
       "      <th>Citations per Document</th>\n",
       "      <th>Clean Citation/Doc</th>\n",
       "      <th>Documents_all</th>\n",
       "      <th>AI/All Documents</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>China</td>\n",
       "      <td>80685</td>\n",
       "      <td>11.22</td>\n",
       "      <td>1.333841</td>\n",
       "      <td>3617355</td>\n",
       "      <td>2.230497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>United States</td>\n",
       "      <td>65565</td>\n",
       "      <td>23.22</td>\n",
       "      <td>11.048257</td>\n",
       "      <td>8626193</td>\n",
       "      <td>0.760069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Japan</td>\n",
       "      <td>27049</td>\n",
       "      <td>8.44</td>\n",
       "      <td>3.503420</td>\n",
       "      <td>2074872</td>\n",
       "      <td>1.303647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>22460</td>\n",
       "      <td>18.00</td>\n",
       "      <td>9.373108</td>\n",
       "      <td>2397817</td>\n",
       "      <td>0.936685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Germany</td>\n",
       "      <td>17364</td>\n",
       "      <td>14.99</td>\n",
       "      <td>6.685844</td>\n",
       "      <td>2176860</td>\n",
       "      <td>0.797663</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Country  Documents  Citations per Document  Clean Citation/Doc  \\\n",
       "0           China      80685                   11.22            1.333841   \n",
       "1   United States      65565                   23.22           11.048257   \n",
       "2           Japan      27049                    8.44            3.503420   \n",
       "3  United Kingdom      22460                   18.00            9.373108   \n",
       "4         Germany      17364                   14.99            6.685844   \n",
       "\n",
       "   Documents_all  AI/All Documents  \n",
       "0        3617355          2.230497  \n",
       "1        8626193          0.760069  \n",
       "2        2074872          1.303647  \n",
       "3        2397817          0.936685  \n",
       "4        2176860          0.797663  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_all = pd.read_csv('./data/scimagojr_all_countryrank.csv')\n",
    "rank_all=rank_all[['Country', 'Documents']]\n",
    "rank_all.columns = ['Country', 'Documents_all']\n",
    "\n",
    "rank = rank_ai.merge(rank_all, on='Country')\n",
    "rank['AI/All Documents'] = rank['Documents'] / rank['Documents_all'] * 100\n",
    "\n",
    "rank.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = rank.merge(templ, on='Country')\n",
    "df.to_csv('./data/citations.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Number of \"Artificial Intelligence\" Publications from 1996 to 2014"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~rasbt/634.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import plotly.plotly as py\n",
    "from plotly.graph_objs import Annotation, Annotations\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('./data/citations.csv')\n",
    "\n",
    "data=[dict(\n",
    "        type='choropleth',\n",
    "        locations=df['Code'],\n",
    "        z = df['Documents'],\n",
    "        text = df['Country'],\n",
    "        colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
    "                    [0.35,\"rgb(40, 60, 190)\"],\n",
    "                    [0.5,\"rgb(70, 100, 245)\"],\n",
    "                    [0.6,\"rgb(90, 120, 245)\"],\n",
    "                    [0.7,\"rgb(106, 137, 247)\"],\n",
    "                    [1,\"rgb(220, 220, 220)\"]],\n",
    "        autocolorscale=False,\n",
    "        reversescale=True,\n",
    "        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
    "        tick0=0,\n",
    "        zmin=0,\n",
    "        dtick=1000,\n",
    "        colorbar=dict(\n",
    "            autotick=False,\n",
    "            tickprefix='',\n",
    "            title='Number of Publications'))]\n",
    "\n",
    "layout=dict(\n",
    "    title='Number of \"Artificial Intelligence\" Publications from 1996 to 2014',\n",
    "    geo=dict(\n",
    "        showframe=False,\n",
    "        showcoastlines=False,\n",
    "        projection=dict(type='Mercator')),\n",
    "    annotations=Annotations([\n",
    "        Annotation(\n",
    "            text='Data source: http://www.scimagojr.com<br>'\\\n",
    "                  'Author: Sebastian Raschka<br>'\\\n",
    "                  '(sebastianraschka.com, @rasbt)',\n",
    "            xref='paper',\n",
    "            yref='paper',\n",
    "            align='left',\n",
    "            x=0,\n",
    "            y=0,\n",
    "            yanchor='bottom',\n",
    "            showarrow=False)]))\n",
    "\n",
    "fig = dict(data=data, layout=layout)\n",
    "#py.image.save_as({'data': data}, './images/ai_publications_1.svg')\n",
    "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-1')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Number of Citations/Publication (self-citations included)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~rasbt/636.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [dict(\n",
    "        type='choropleth',\n",
    "        locations=df['Code'],\n",
    "        z=df['Citations per Document'],\n",
    "        text=df['Country'],\n",
    "        colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
    "                    [0.35,\"rgb(40, 60, 190)\"],\n",
    "                    [0.5,\"rgb(70, 100, 245)\"],\n",
    "                    [0.6,\"rgb(90, 120, 245)\"],\n",
    "                    [0.7,\"rgb(106, 137, 247)\"],\n",
    "                    [1,\"rgb(220, 220, 220)\"]],\n",
    "        autocolorscale=False,\n",
    "        reversescale=True,\n",
    "        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
    "        tick0=0,\n",
    "        zmin=0,\n",
    "        dtick=1000,\n",
    "        colorbar=dict(\n",
    "            autotick=False,\n",
    "            tickprefix='',\n",
    "            title='Number of Citations/Publication<br>(self-citations included)'))]\n",
    "\n",
    "layout=dict(\n",
    "    title='Number Citations per Publication in \"Artificial Intelligence\" Research from 1996 to 2014'\\\n",
    "           '<br>(self-citations included)',\n",
    "    geo=dict(\n",
    "        showframe=False,\n",
    "        showcoastlines=False,\n",
    "        projection=dict(type='Mercator')),\n",
    "    annotations=Annotations([\n",
    "        Annotation(\n",
    "            text='Data source: http://www.scimagojr.com<br>'\\\n",
    "                  'Author: Sebastian Raschka<br>'\\\n",
    "                  '(sebastianraschka.com, @rasbt)',\n",
    "            yref='paper',\n",
    "            align='left',\n",
    "            x=0,\n",
    "            y=0,\n",
    "            yanchor='bottom',\n",
    "            showarrow=False)]))\n",
    "\n",
    "fig = dict(data=data, layout=layout)\n",
    "#py.image.save_as({'data': data}, './images/ai_publications_2.svg')\n",
    "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-2')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Number of Citations/Publication<br>(self-citations excluded)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~rasbt/638.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [dict(\n",
    "        type='choropleth',\n",
    "        locations=df['Code'],\n",
    "        z=df['Clean Citation/Doc'],\n",
    "        text=df['Country'],\n",
    "        colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
    "                    [0.35,\"rgb(40, 60, 190)\"],\n",
    "                    [0.5,\"rgb(70, 100, 245)\"],\n",
    "                    [0.6,\"rgb(90, 120, 245)\"],\n",
    "                    [0.7,\"rgb(106, 137, 247)\"],\n",
    "                    [1,\"rgb(220, 220, 220)\"]],\n",
    "        autocolorscale=False,\n",
    "        reversescale=True,\n",
    "        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
    "        tick0=0,\n",
    "        zmin=0,\n",
    "        dtick=1000,\n",
    "        colorbar=dict(\n",
    "            autotick=False,\n",
    "            tickprefix='',\n",
    "            title='Number of Citations/Publication<br>(self-citations excluded)'))]\n",
    "\n",
    "layout=dict(\n",
    "    title='Number Citations per Publication in \"Artificial Intelligence\" Research from 1996 to 2014'\\\n",
    "           '<br>(self-citations excluded)',\n",
    "    geo=dict(\n",
    "        showframe=False,\n",
    "        showcoastlines=False,\n",
    "        projection=dict(type='Mercator')),\n",
    "    annotations=Annotations([\n",
    "        Annotation(\n",
    "            text='Data source: http://www.scimagojr.com<br>'\\\n",
    "                  'Author: Sebastian Raschka<br>'\\\n",
    "                  '(sebastianraschka.com, @rasbt)',\n",
    "            yref='paper',\n",
    "            align='left',\n",
    "            x=0,\n",
    "            y=0,\n",
    "            yanchor='bottom',\n",
    "            showarrow=False)]))\n",
    "\n",
    "fig = dict(data=data, layout=layout)\n",
    "#py.image.save_as({'data': data}, './images/ai_publications_3.svg')\n",
    "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-3')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## AI Publications per Total Publications"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~rasbt/656.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [dict(\n",
    "        type='choropleth',\n",
    "        locations=df['Code'],\n",
    "        z=df['AI/All Documents'],\n",
    "        text=df['Country'],\n",
    "        colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
    "                    [0.35,\"rgb(40, 60, 190)\"],\n",
    "                    [0.5,\"rgb(70, 100, 245)\"],\n",
    "                    [0.6,\"rgb(90, 120, 245)\"],\n",
    "                    [0.7,\"rgb(106, 137, 247)\"],\n",
    "                    [1,\"rgb(220, 220, 220)\"]],\n",
    "        autocolorscale=False,\n",
    "        reversescale=True,\n",
    "        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
    "        tick0=0,\n",
    "        zmin=0,\n",
    "        dtick=1000,\n",
    "        colorbar=dict(\n",
    "            autotick=False,\n",
    "            tickprefix='',\n",
    "            title='AI/all publications in percent'))]\n",
    "\n",
    "layout=dict(\n",
    "    title='Percentage of Publications in \"Artificial Intelligence\" Research from 1996 to 2014',\n",
    "    geo=dict(\n",
    "        showframe=False,\n",
    "        showcoastlines=False,\n",
    "        projection=dict(type='Mercator')),\n",
    "    annotations=Annotations([\n",
    "        Annotation(\n",
    "            text='Data source: http://www.scimagojr.com<br>'\\\n",
    "                  'Author: Sebastian Raschka<br>'\\\n",
    "                  '(sebastianraschka.com, @rasbt)',\n",
    "            yref='paper',\n",
    "            align='left',\n",
    "            x=0,\n",
    "            y=0,\n",
    "            yanchor='bottom',\n",
    "            showarrow=False)]))\n",
    "\n",
    "fig = dict(data=data, layout=layout)\n",
    "#py.image.save_as({'data': data}, './images/ai_publications_4.svg')\n",
    "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-4') "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Scatterplots"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~rasbt/640.embed\" height=\"525px\" width=\"100%\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import plotly.plotly as py\n",
    "from plotly.graph_objs import *\n",
    "\n",
    "trace0 = Scatter(\n",
    "    x=df.loc[df['Documents']>=10000, 'Documents'],\n",
    "    y=df.loc[df['Documents']>=10000, 'Clean Citation/Doc'],\n",
    "    mode='markers+text',\n",
    "    text=df['Country'],\n",
    "    textposition='top center',\n",
    "    textfont=Font(family='Arial'),\n",
    "    marker=Marker(size=12),)\n",
    "\n",
    "\n",
    "data = Data([trace0])\n",
    "layout = Layout(\n",
    "    xaxis=XAxis(autorange=True, title='Number of A.I. documents published'),\n",
    "    yaxis=YAxis(autorange=True, title='Number of citations (self-citations excluded'),\n",
    "    legend=Legend(\n",
    "        y=0.5,\n",
    "        yref='paper',\n",
    "        font=Font(size=18)),\n",
    "    annotations=Annotations([\n",
    "        Annotation(\n",
    "            text='Data source: http://www.scimagojr.com<br>'\\\n",
    "                  'Author: Sebastian Raschka<br>'\\\n",
    "                  '(sebastianraschka.com, @rasbt)',\n",
    "            yref='paper',\n",
    "            align='left',\n",
    "            x=-0.5,\n",
    "            y=-0.2,\n",
    "            yanchor='bottom',\n",
    "            showarrow=False,\n",
    "            font={'size': 7}),\n",
    "                           \n",
    "    Annotation(\n",
    "            text='*Countries with < 10k publications excluded.',\n",
    "            yref='paper',\n",
    "            xref='paper',\n",
    "            align='center',\n",
    "            y=1.05,\n",
    "            x=0.5,\n",
    "            yanchor='bottom',\n",
    "            showarrow=False,\n",
    "            font={'size': 12})]),\n",
    "    title='A.I. Publications By Country* from 1996-2014',)\n",
    "\n",
    "fig = Figure(data=data, layout=layout)\n",
    "py.iplot(fig, filename='ai-publications-scatter-1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}