{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Artificial Intelligence Publications "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sebastian Raschka 08/31/2015 \n",
"\n",
"CPython 3.4.3\n",
"IPython 4.0.0\n",
"\n",
"plotly 1.8.3\n",
"pandas 0.16.2\n"
]
}
],
"source": [
"%load_ext watermark\n",
"%watermark -a 'Sebastian Raschka' -d -v -p plotly,pandas"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preparing the Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" Country | \n",
" Code | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Afghanistan | \n",
" AFG | \n",
"
\n",
" \n",
" 1 | \n",
" Albania | \n",
" ALB | \n",
"
\n",
" \n",
" 2 | \n",
" Algeria | \n",
" DZA | \n",
"
\n",
" \n",
" 3 | \n",
" American Samoa | \n",
" ASM | \n",
"
\n",
" \n",
" 4 | \n",
" Andorra | \n",
" AND | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Country Code\n",
"0 Afghanistan AFG\n",
"1 Albania ALB\n",
"2 Algeria DZA\n",
"3 American Samoa ASM\n",
"4 Andorra AND"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"templ = pd.read_csv('./data/template.csv')\n",
"templ = templ[['COUNTRY', 'CODE']]\n",
"templ.columns = ['Country', 'Code']\n",
"templ.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Rank | \n",
" Country | \n",
" Documents | \n",
" Citable documents | \n",
" Citations | \n",
" Self-Citations | \n",
" Citations per Document | \n",
" H index | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" China | \n",
" 80685 | \n",
" 0 | \n",
" 286809 | \n",
" 179188 | \n",
" 11.22 | \n",
" 150 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" United States | \n",
" 65565 | \n",
" 0 | \n",
" 1063805 | \n",
" 339426 | \n",
" 23.22 | \n",
" 345 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Japan | \n",
" 27049 | \n",
" 0 | \n",
" 139799 | \n",
" 45035 | \n",
" 8.44 | \n",
" 116 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" United Kingdom | \n",
" 22460 | \n",
" 0 | \n",
" 266440 | \n",
" 55920 | \n",
" 18.00 | \n",
" 174 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Germany | \n",
" 17364 | \n",
" 0 | \n",
" 146713 | \n",
" 30620 | \n",
" 14.99 | \n",
" 142 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Rank Country Documents Citable documents Citations \\\n",
"0 1 China 80685 0 286809 \n",
"1 2 United States 65565 0 1063805 \n",
"2 3 Japan 27049 0 139799 \n",
"3 4 United Kingdom 22460 0 266440 \n",
"4 5 Germany 17364 0 146713 \n",
"\n",
" Self-Citations Citations per Document H index \n",
"0 179188 11.22 150 \n",
"1 339426 23.22 345 \n",
"2 45035 8.44 116 \n",
"3 55920 18.00 174 \n",
"4 30620 14.99 142 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rank_ai = pd.read_csv('./data/scimagojr_ai_countryrank.csv')\n",
"rank_ai.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Country | \n",
" Documents | \n",
" Citations per Document | \n",
" Clean Citation/Doc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" China | \n",
" 80685 | \n",
" 11.22 | \n",
" 1.333841 | \n",
"
\n",
" \n",
" 1 | \n",
" United States | \n",
" 65565 | \n",
" 23.22 | \n",
" 11.048257 | \n",
"
\n",
" \n",
" 2 | \n",
" Japan | \n",
" 27049 | \n",
" 8.44 | \n",
" 3.503420 | \n",
"
\n",
" \n",
" 3 | \n",
" United Kingdom | \n",
" 22460 | \n",
" 18.00 | \n",
" 9.373108 | \n",
"
\n",
" \n",
" 4 | \n",
" Germany | \n",
" 17364 | \n",
" 14.99 | \n",
" 6.685844 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Country Documents Citations per Document Clean Citation/Doc\n",
"0 China 80685 11.22 1.333841\n",
"1 United States 65565 23.22 11.048257\n",
"2 Japan 27049 8.44 3.503420\n",
"3 United Kingdom 22460 18.00 9.373108\n",
"4 Germany 17364 14.99 6.685844"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rank_ai['Clean Citation/Doc'] = (rank_ai['Citations'] - rank_ai['Self-Citations']) / rank_ai['Documents']\n",
"rank_ai=rank_ai[['Country', \n",
" 'Documents', \n",
" 'Citations per Document', \n",
" 'Clean Citation/Doc']]\n",
"rank_ai.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Country | \n",
" Documents | \n",
" Citations per Document | \n",
" Clean Citation/Doc | \n",
" Documents_all | \n",
" AI/All Documents | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" China | \n",
" 80685 | \n",
" 11.22 | \n",
" 1.333841 | \n",
" 3617355 | \n",
" 2.230497 | \n",
"
\n",
" \n",
" 1 | \n",
" United States | \n",
" 65565 | \n",
" 23.22 | \n",
" 11.048257 | \n",
" 8626193 | \n",
" 0.760069 | \n",
"
\n",
" \n",
" 2 | \n",
" Japan | \n",
" 27049 | \n",
" 8.44 | \n",
" 3.503420 | \n",
" 2074872 | \n",
" 1.303647 | \n",
"
\n",
" \n",
" 3 | \n",
" United Kingdom | \n",
" 22460 | \n",
" 18.00 | \n",
" 9.373108 | \n",
" 2397817 | \n",
" 0.936685 | \n",
"
\n",
" \n",
" 4 | \n",
" Germany | \n",
" 17364 | \n",
" 14.99 | \n",
" 6.685844 | \n",
" 2176860 | \n",
" 0.797663 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Country Documents Citations per Document Clean Citation/Doc \\\n",
"0 China 80685 11.22 1.333841 \n",
"1 United States 65565 23.22 11.048257 \n",
"2 Japan 27049 8.44 3.503420 \n",
"3 United Kingdom 22460 18.00 9.373108 \n",
"4 Germany 17364 14.99 6.685844 \n",
"\n",
" Documents_all AI/All Documents \n",
"0 3617355 2.230497 \n",
"1 8626193 0.760069 \n",
"2 2074872 1.303647 \n",
"3 2397817 0.936685 \n",
"4 2176860 0.797663 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rank_all = pd.read_csv('./data/scimagojr_all_countryrank.csv')\n",
"rank_all=rank_all[['Country', 'Documents']]\n",
"rank_all.columns = ['Country', 'Documents_all']\n",
"\n",
"rank = rank_ai.merge(rank_all, on='Country')\n",
"rank['AI/All Documents'] = rank['Documents'] / rank['Documents_all'] * 100\n",
"\n",
"rank.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = rank.merge(templ, on='Country')\n",
"df.to_csv('./data/citations.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number of \"Artificial Intelligence\" Publications from 1996 to 2014"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"from plotly.graph_objs import Annotation, Annotations\n",
"import pandas as pd\n",
"\n",
"df = pd.read_csv('./data/citations.csv')\n",
"\n",
"data=[dict(\n",
" type='choropleth',\n",
" locations=df['Code'],\n",
" z = df['Documents'],\n",
" text = df['Country'],\n",
" colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
" [0.35,\"rgb(40, 60, 190)\"],\n",
" [0.5,\"rgb(70, 100, 245)\"],\n",
" [0.6,\"rgb(90, 120, 245)\"],\n",
" [0.7,\"rgb(106, 137, 247)\"],\n",
" [1,\"rgb(220, 220, 220)\"]],\n",
" autocolorscale=False,\n",
" reversescale=True,\n",
" marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
" tick0=0,\n",
" zmin=0,\n",
" dtick=1000,\n",
" colorbar=dict(\n",
" autotick=False,\n",
" tickprefix='',\n",
" title='Number of Publications'))]\n",
"\n",
"layout=dict(\n",
" title='Number of \"Artificial Intelligence\" Publications from 1996 to 2014',\n",
" geo=dict(\n",
" showframe=False,\n",
" showcoastlines=False,\n",
" projection=dict(type='Mercator')),\n",
" annotations=Annotations([\n",
" Annotation(\n",
" text='Data source: http://www.scimagojr.com
'\\\n",
" 'Author: Sebastian Raschka
'\\\n",
" '(sebastianraschka.com, @rasbt)',\n",
" xref='paper',\n",
" yref='paper',\n",
" align='left',\n",
" x=0,\n",
" y=0,\n",
" yanchor='bottom',\n",
" showarrow=False)]))\n",
"\n",
"fig = dict(data=data, layout=layout)\n",
"#py.image.save_as({'data': data}, './images/ai_publications_1.svg')\n",
"py.iplot(fig, validate=False, filename='ai-publications-chloropleth-1')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number of Citations/Publication (self-citations included)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = [dict(\n",
" type='choropleth',\n",
" locations=df['Code'],\n",
" z=df['Citations per Document'],\n",
" text=df['Country'],\n",
" colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
" [0.35,\"rgb(40, 60, 190)\"],\n",
" [0.5,\"rgb(70, 100, 245)\"],\n",
" [0.6,\"rgb(90, 120, 245)\"],\n",
" [0.7,\"rgb(106, 137, 247)\"],\n",
" [1,\"rgb(220, 220, 220)\"]],\n",
" autocolorscale=False,\n",
" reversescale=True,\n",
" marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
" tick0=0,\n",
" zmin=0,\n",
" dtick=1000,\n",
" colorbar=dict(\n",
" autotick=False,\n",
" tickprefix='',\n",
" title='Number of Citations/Publication
(self-citations included)'))]\n",
"\n",
"layout=dict(\n",
" title='Number Citations per Publication in \"Artificial Intelligence\" Research from 1996 to 2014'\\\n",
" '
(self-citations included)',\n",
" geo=dict(\n",
" showframe=False,\n",
" showcoastlines=False,\n",
" projection=dict(type='Mercator')),\n",
" annotations=Annotations([\n",
" Annotation(\n",
" text='Data source: http://www.scimagojr.com
'\\\n",
" 'Author: Sebastian Raschka
'\\\n",
" '(sebastianraschka.com, @rasbt)',\n",
" yref='paper',\n",
" align='left',\n",
" x=0,\n",
" y=0,\n",
" yanchor='bottom',\n",
" showarrow=False)]))\n",
"\n",
"fig = dict(data=data, layout=layout)\n",
"#py.image.save_as({'data': data}, './images/ai_publications_2.svg')\n",
"py.iplot(fig, validate=False, filename='ai-publications-chloropleth-2')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number of Citations/Publication
(self-citations excluded)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = [dict(\n",
" type='choropleth',\n",
" locations=df['Code'],\n",
" z=df['Clean Citation/Doc'],\n",
" text=df['Country'],\n",
" colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
" [0.35,\"rgb(40, 60, 190)\"],\n",
" [0.5,\"rgb(70, 100, 245)\"],\n",
" [0.6,\"rgb(90, 120, 245)\"],\n",
" [0.7,\"rgb(106, 137, 247)\"],\n",
" [1,\"rgb(220, 220, 220)\"]],\n",
" autocolorscale=False,\n",
" reversescale=True,\n",
" marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
" tick0=0,\n",
" zmin=0,\n",
" dtick=1000,\n",
" colorbar=dict(\n",
" autotick=False,\n",
" tickprefix='',\n",
" title='Number of Citations/Publication
(self-citations excluded)'))]\n",
"\n",
"layout=dict(\n",
" title='Number Citations per Publication in \"Artificial Intelligence\" Research from 1996 to 2014'\\\n",
" '
(self-citations excluded)',\n",
" geo=dict(\n",
" showframe=False,\n",
" showcoastlines=False,\n",
" projection=dict(type='Mercator')),\n",
" annotations=Annotations([\n",
" Annotation(\n",
" text='Data source: http://www.scimagojr.com
'\\\n",
" 'Author: Sebastian Raschka
'\\\n",
" '(sebastianraschka.com, @rasbt)',\n",
" yref='paper',\n",
" align='left',\n",
" x=0,\n",
" y=0,\n",
" yanchor='bottom',\n",
" showarrow=False)]))\n",
"\n",
"fig = dict(data=data, layout=layout)\n",
"#py.image.save_as({'data': data}, './images/ai_publications_3.svg')\n",
"py.iplot(fig, validate=False, filename='ai-publications-chloropleth-3')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## AI Publications per Total Publications"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = [dict(\n",
" type='choropleth',\n",
" locations=df['Code'],\n",
" z=df['AI/All Documents'],\n",
" text=df['Country'],\n",
" colorscale=[[0,\"rgb(5, 10, 172)\"],\n",
" [0.35,\"rgb(40, 60, 190)\"],\n",
" [0.5,\"rgb(70, 100, 245)\"],\n",
" [0.6,\"rgb(90, 120, 245)\"],\n",
" [0.7,\"rgb(106, 137, 247)\"],\n",
" [1,\"rgb(220, 220, 220)\"]],\n",
" autocolorscale=False,\n",
" reversescale=True,\n",
" marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),\n",
" tick0=0,\n",
" zmin=0,\n",
" dtick=1000,\n",
" colorbar=dict(\n",
" autotick=False,\n",
" tickprefix='',\n",
" title='AI/all publications in percent'))]\n",
"\n",
"layout=dict(\n",
" title='Percentage of Publications in \"Artificial Intelligence\" Research from 1996 to 2014',\n",
" geo=dict(\n",
" showframe=False,\n",
" showcoastlines=False,\n",
" projection=dict(type='Mercator')),\n",
" annotations=Annotations([\n",
" Annotation(\n",
" text='Data source: http://www.scimagojr.com
'\\\n",
" 'Author: Sebastian Raschka
'\\\n",
" '(sebastianraschka.com, @rasbt)',\n",
" yref='paper',\n",
" align='left',\n",
" x=0,\n",
" y=0,\n",
" yanchor='bottom',\n",
" showarrow=False)]))\n",
"\n",
"fig = dict(data=data, layout=layout)\n",
"#py.image.save_as({'data': data}, './images/ai_publications_4.svg')\n",
"py.iplot(fig, validate=False, filename='ai-publications-chloropleth-4') "
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Scatterplots"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import plotly.plotly as py\n",
"from plotly.graph_objs import *\n",
"\n",
"trace0 = Scatter(\n",
" x=df.loc[df['Documents']>=10000, 'Documents'],\n",
" y=df.loc[df['Documents']>=10000, 'Clean Citation/Doc'],\n",
" mode='markers+text',\n",
" text=df['Country'],\n",
" textposition='top center',\n",
" textfont=Font(family='Arial'),\n",
" marker=Marker(size=12),)\n",
"\n",
"\n",
"data = Data([trace0])\n",
"layout = Layout(\n",
" xaxis=XAxis(autorange=True, title='Number of A.I. documents published'),\n",
" yaxis=YAxis(autorange=True, title='Number of citations (self-citations excluded'),\n",
" legend=Legend(\n",
" y=0.5,\n",
" yref='paper',\n",
" font=Font(size=18)),\n",
" annotations=Annotations([\n",
" Annotation(\n",
" text='Data source: http://www.scimagojr.com
'\\\n",
" 'Author: Sebastian Raschka
'\\\n",
" '(sebastianraschka.com, @rasbt)',\n",
" yref='paper',\n",
" align='left',\n",
" x=-0.5,\n",
" y=-0.2,\n",
" yanchor='bottom',\n",
" showarrow=False,\n",
" font={'size': 7}),\n",
" \n",
" Annotation(\n",
" text='*Countries with < 10k publications excluded.',\n",
" yref='paper',\n",
" xref='paper',\n",
" align='center',\n",
" y=1.05,\n",
" x=0.5,\n",
" yanchor='bottom',\n",
" showarrow=False,\n",
" font={'size': 12})]),\n",
" title='A.I. Publications By Country* from 1996-2014',)\n",
"\n",
"fig = Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='ai-publications-scatter-1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}