{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Artificial Intelligence Publications " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sebastian Raschka 08/31/2015 \n", "\n", "CPython 3.4.3\n", "IPython 4.0.0\n", "\n", "plotly 1.8.3\n", "pandas 0.16.2\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark -a 'Sebastian Raschka' -d -v -p plotly,pandas" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preparing the Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryCode
0AfghanistanAFG
1AlbaniaALB
2AlgeriaDZA
3American SamoaASM
4AndorraAND
\n", "
" ], "text/plain": [ " Country Code\n", "0 Afghanistan AFG\n", "1 Albania ALB\n", "2 Algeria DZA\n", "3 American Samoa ASM\n", "4 Andorra AND" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "templ = pd.read_csv('./data/template.csv')\n", "templ = templ[['COUNTRY', 'CODE']]\n", "templ.columns = ['Country', 'Code']\n", "templ.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RankCountryDocumentsCitable documentsCitationsSelf-CitationsCitations per DocumentH index
01China80685028680917918811.22150
12United States655650106380533942623.22345
23Japan270490139799450358.44116
34United Kingdom2246002664405592018.00174
45Germany1736401467133062014.99142
\n", "
" ], "text/plain": [ " Rank Country Documents Citable documents Citations \\\n", "0 1 China 80685 0 286809 \n", "1 2 United States 65565 0 1063805 \n", "2 3 Japan 27049 0 139799 \n", "3 4 United Kingdom 22460 0 266440 \n", "4 5 Germany 17364 0 146713 \n", "\n", " Self-Citations Citations per Document H index \n", "0 179188 11.22 150 \n", "1 339426 23.22 345 \n", "2 45035 8.44 116 \n", "3 55920 18.00 174 \n", "4 30620 14.99 142 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rank_ai = pd.read_csv('./data/scimagojr_ai_countryrank.csv')\n", "rank_ai.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryDocumentsCitations per DocumentClean Citation/Doc
0China8068511.221.333841
1United States6556523.2211.048257
2Japan270498.443.503420
3United Kingdom2246018.009.373108
4Germany1736414.996.685844
\n", "
" ], "text/plain": [ " Country Documents Citations per Document Clean Citation/Doc\n", "0 China 80685 11.22 1.333841\n", "1 United States 65565 23.22 11.048257\n", "2 Japan 27049 8.44 3.503420\n", "3 United Kingdom 22460 18.00 9.373108\n", "4 Germany 17364 14.99 6.685844" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rank_ai['Clean Citation/Doc'] = (rank_ai['Citations'] - rank_ai['Self-Citations']) / rank_ai['Documents']\n", "rank_ai=rank_ai[['Country', \n", " 'Documents', \n", " 'Citations per Document', \n", " 'Clean Citation/Doc']]\n", "rank_ai.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryDocumentsCitations per DocumentClean Citation/DocDocuments_allAI/All Documents
0China8068511.221.33384136173552.230497
1United States6556523.2211.04825786261930.760069
2Japan270498.443.50342020748721.303647
3United Kingdom2246018.009.37310823978170.936685
4Germany1736414.996.68584421768600.797663
\n", "
" ], "text/plain": [ " Country Documents Citations per Document Clean Citation/Doc \\\n", "0 China 80685 11.22 1.333841 \n", "1 United States 65565 23.22 11.048257 \n", "2 Japan 27049 8.44 3.503420 \n", "3 United Kingdom 22460 18.00 9.373108 \n", "4 Germany 17364 14.99 6.685844 \n", "\n", " Documents_all AI/All Documents \n", "0 3617355 2.230497 \n", "1 8626193 0.760069 \n", "2 2074872 1.303647 \n", "3 2397817 0.936685 \n", "4 2176860 0.797663 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rank_all = pd.read_csv('./data/scimagojr_all_countryrank.csv')\n", "rank_all=rank_all[['Country', 'Documents']]\n", "rank_all.columns = ['Country', 'Documents_all']\n", "\n", "rank = rank_ai.merge(rank_all, on='Country')\n", "rank['AI/All Documents'] = rank['Documents'] / rank['Documents_all'] * 100\n", "\n", "rank.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = rank.merge(templ, on='Country')\n", "df.to_csv('./data/citations.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Number of \"Artificial Intelligence\" Publications from 1996 to 2014" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "from plotly.graph_objs import Annotation, Annotations\n", "import pandas as pd\n", "\n", "df = pd.read_csv('./data/citations.csv')\n", "\n", "data=[dict(\n", " type='choropleth',\n", " locations=df['Code'],\n", " z = df['Documents'],\n", " text = df['Country'],\n", " colorscale=[[0,\"rgb(5, 10, 172)\"],\n", " [0.35,\"rgb(40, 60, 190)\"],\n", " [0.5,\"rgb(70, 100, 245)\"],\n", " [0.6,\"rgb(90, 120, 245)\"],\n", " [0.7,\"rgb(106, 137, 247)\"],\n", " [1,\"rgb(220, 220, 220)\"]],\n", " autocolorscale=False,\n", " reversescale=True,\n", " marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n", " tick0=0,\n", " zmin=0,\n", " dtick=1000,\n", " colorbar=dict(\n", " autotick=False,\n", " tickprefix='',\n", " title='Number of Publications'))]\n", "\n", "layout=dict(\n", " title='Number of \"Artificial Intelligence\" Publications from 1996 to 2014',\n", " geo=dict(\n", " showframe=False,\n", " showcoastlines=False,\n", " projection=dict(type='Mercator')),\n", " annotations=Annotations([\n", " Annotation(\n", " text='Data source: http://www.scimagojr.com
'\\\n", " 'Author: Sebastian Raschka
'\\\n", " '(sebastianraschka.com, @rasbt)',\n", " xref='paper',\n", " yref='paper',\n", " align='left',\n", " x=0,\n", " y=0,\n", " yanchor='bottom',\n", " showarrow=False)]))\n", "\n", "fig = dict(data=data, layout=layout)\n", "#py.image.save_as({'data': data}, './images/ai_publications_1.svg')\n", "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-1')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Number of Citations/Publication (self-citations included)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [dict(\n", " type='choropleth',\n", " locations=df['Code'],\n", " z=df['Citations per Document'],\n", " text=df['Country'],\n", " colorscale=[[0,\"rgb(5, 10, 172)\"],\n", " [0.35,\"rgb(40, 60, 190)\"],\n", " [0.5,\"rgb(70, 100, 245)\"],\n", " [0.6,\"rgb(90, 120, 245)\"],\n", " [0.7,\"rgb(106, 137, 247)\"],\n", " [1,\"rgb(220, 220, 220)\"]],\n", " autocolorscale=False,\n", " reversescale=True,\n", " marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n", " tick0=0,\n", " zmin=0,\n", " dtick=1000,\n", " colorbar=dict(\n", " autotick=False,\n", " tickprefix='',\n", " title='Number of Citations/Publication
(self-citations included)'))]\n", "\n", "layout=dict(\n", " title='Number Citations per Publication in \"Artificial Intelligence\" Research from 1996 to 2014'\\\n", " '
(self-citations included)',\n", " geo=dict(\n", " showframe=False,\n", " showcoastlines=False,\n", " projection=dict(type='Mercator')),\n", " annotations=Annotations([\n", " Annotation(\n", " text='Data source: http://www.scimagojr.com
'\\\n", " 'Author: Sebastian Raschka
'\\\n", " '(sebastianraschka.com, @rasbt)',\n", " yref='paper',\n", " align='left',\n", " x=0,\n", " y=0,\n", " yanchor='bottom',\n", " showarrow=False)]))\n", "\n", "fig = dict(data=data, layout=layout)\n", "#py.image.save_as({'data': data}, './images/ai_publications_2.svg')\n", "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-2')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Number of Citations/Publication
(self-citations excluded)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [dict(\n", " type='choropleth',\n", " locations=df['Code'],\n", " z=df['Clean Citation/Doc'],\n", " text=df['Country'],\n", " colorscale=[[0,\"rgb(5, 10, 172)\"],\n", " [0.35,\"rgb(40, 60, 190)\"],\n", " [0.5,\"rgb(70, 100, 245)\"],\n", " [0.6,\"rgb(90, 120, 245)\"],\n", " [0.7,\"rgb(106, 137, 247)\"],\n", " [1,\"rgb(220, 220, 220)\"]],\n", " autocolorscale=False,\n", " reversescale=True,\n", " marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n", " tick0=0,\n", " zmin=0,\n", " dtick=1000,\n", " colorbar=dict(\n", " autotick=False,\n", " tickprefix='',\n", " title='Number of Citations/Publication
(self-citations excluded)'))]\n", "\n", "layout=dict(\n", " title='Number Citations per Publication in \"Artificial Intelligence\" Research from 1996 to 2014'\\\n", " '
(self-citations excluded)',\n", " geo=dict(\n", " showframe=False,\n", " showcoastlines=False,\n", " projection=dict(type='Mercator')),\n", " annotations=Annotations([\n", " Annotation(\n", " text='Data source: http://www.scimagojr.com
'\\\n", " 'Author: Sebastian Raschka
'\\\n", " '(sebastianraschka.com, @rasbt)',\n", " yref='paper',\n", " align='left',\n", " x=0,\n", " y=0,\n", " yanchor='bottom',\n", " showarrow=False)]))\n", "\n", "fig = dict(data=data, layout=layout)\n", "#py.image.save_as({'data': data}, './images/ai_publications_3.svg')\n", "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-3')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## AI Publications per Total Publications" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [dict(\n", " type='choropleth',\n", " locations=df['Code'],\n", " z=df['AI/All Documents'],\n", " text=df['Country'],\n", " colorscale=[[0,\"rgb(5, 10, 172)\"],\n", " [0.35,\"rgb(40, 60, 190)\"],\n", " [0.5,\"rgb(70, 100, 245)\"],\n", " [0.6,\"rgb(90, 120, 245)\"],\n", " [0.7,\"rgb(106, 137, 247)\"],\n", " [1,\"rgb(220, 220, 220)\"]],\n", " autocolorscale=False,\n", " reversescale=True,\n", " marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n", " tick0=0,\n", " zmin=0,\n", " dtick=1000,\n", " colorbar=dict(\n", " autotick=False,\n", " tickprefix='',\n", " title='AI/all publications in percent'))]\n", "\n", "layout=dict(\n", " title='Percentage of Publications in \"Artificial Intelligence\" Research from 1996 to 2014',\n", " geo=dict(\n", " showframe=False,\n", " showcoastlines=False,\n", " projection=dict(type='Mercator')),\n", " annotations=Annotations([\n", " Annotation(\n", " text='Data source: http://www.scimagojr.com
'\\\n", " 'Author: Sebastian Raschka
'\\\n", " '(sebastianraschka.com, @rasbt)',\n", " yref='paper',\n", " align='left',\n", " x=0,\n", " y=0,\n", " yanchor='bottom',\n", " showarrow=False)]))\n", "\n", "fig = dict(data=data, layout=layout)\n", "#py.image.save_as({'data': data}, './images/ai_publications_4.svg')\n", "py.iplot(fig, validate=False, filename='ai-publications-chloropleth-4') " ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### Scatterplots" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import plotly.plotly as py\n", "from plotly.graph_objs import *\n", "\n", "trace0 = Scatter(\n", " x=df.loc[df['Documents']>=10000, 'Documents'],\n", " y=df.loc[df['Documents']>=10000, 'Clean Citation/Doc'],\n", " mode='markers+text',\n", " text=df['Country'],\n", " textposition='top center',\n", " textfont=Font(family='Arial'),\n", " marker=Marker(size=12),)\n", "\n", "\n", "data = Data([trace0])\n", "layout = Layout(\n", " xaxis=XAxis(autorange=True, title='Number of A.I. documents published'),\n", " yaxis=YAxis(autorange=True, title='Number of citations (self-citations excluded'),\n", " legend=Legend(\n", " y=0.5,\n", " yref='paper',\n", " font=Font(size=18)),\n", " annotations=Annotations([\n", " Annotation(\n", " text='Data source: http://www.scimagojr.com
'\\\n", " 'Author: Sebastian Raschka
'\\\n", " '(sebastianraschka.com, @rasbt)',\n", " yref='paper',\n", " align='left',\n", " x=-0.5,\n", " y=-0.2,\n", " yanchor='bottom',\n", " showarrow=False,\n", " font={'size': 7}),\n", " \n", " Annotation(\n", " text='*Countries with < 10k publications excluded.',\n", " yref='paper',\n", " xref='paper',\n", " align='center',\n", " y=1.05,\n", " x=0.5,\n", " yanchor='bottom',\n", " showarrow=False,\n", " font={'size': 12})]),\n", " title='A.I. Publications By Country* from 1996-2014',)\n", "\n", "fig = Figure(data=data, layout=layout)\n", "py.iplot(fig, filename='ai-publications-scatter-1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }