{ "metadata": { "name": "artpaut" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Art\u00edculos por autor para cada Grupo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Para obtener las claves de las hojas de c\u00e1lculo de Google, visita la [p\u00e1gina de b\u00fasqueda de Indicadores](http://gfif.udea.edu.co/gssis/sql_query/search.html): (Seleccione indicador, ponga `*` en la caja de `\"select\"`, pulse el bot\u00f3n de b\u00fasqueda y abra el link HTML)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "def read_google_cvs(gss_url=\"http://spreadsheets.google.com\",\\\n", " gss_format=\"csv\",\\\n", " gss_key=\"0AuLa_xuSIEvxdERYSGVQWDBTX1NCN19QMXVpb0lhWXc\",\\\n", " gss_sheet=0,\\\n", " gss_query=\"select B,D,E,F,I where (H contains 'GFIF') order by D desc\",\\\n", " gss_keep_default_na=False\n", " ):\n", " import urllib\n", " import pandas as pd\n", " \"\"\"\n", " read a google spreadsheet in cvs format and return a pandas DataFrame object.\n", " ....\n", " gss_keep_default_na: (False) Blank values are filled with NaN\n", " \"\"\"\n", " issn_url=\"%s/tq?tqx=out:%s&tq=%s&key=%s&gid=%s\" %(gss_url,\\\n", " gss_format,\\\n", " gss_query,\\\n", " gss_key,\\\n", " str(gss_sheet))\n", "\n", " gfile=urllib.urlopen(issn_url)\n", " return pd.read_csv(gfile,keep_default_na=gss_keep_default_na)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "

Art\u00edculos

" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df=read_google_cvs(gss_key='0AjqGPI5Q_Ez6dDA3ajhtYVVDOWdBckVhWm1MSFRET1E',gss_query=\"select *\")\n", "df.columns" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 2, "text": [ "Index([Marca temporal, Nombre de usuario, A\u00f1o, Tipo, Autor(es), Revista, Vol., p\u00e1g., ISSN, Art\u00edculo, Impreso, PDF, Grupo, DOI, Tipo.1, R\u00f3tulo, Autores de la UdeA, Clasificaci\u00f3n Colciencias, Est\u00e1 (S,N,NP)], dtype=object)" ] } ], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Seleccione art\u00edculos A1 o A2 y prepare para filtrar por A\u00f1o" ] }, { "cell_type": "code", "collapsed": false, "input": [ "publicadas=df[pd.np.logical_and(df['ISSN']!='0000-0000',df['Clasificaci\u00f3n Colciencias'].str.contains('A'))]\n", "publicadas=publicadas.replace('','Desconocido')\n", "publicadas=publicadas[publicadas['Grupo']!='Desconocido']\n", "publicadas=publicadas.reset_index(drop=True)\n", "\n", "#Convierta Anyo a entero\n", "for i in range(publicadas.shape[0]):\n", " if publicadas['A\u00f1o'][i]!='null':\n", " publicadas['A\u00f1o'][i]=int(publicadas['A\u00f1o'][i])\n", " else:\n", " publicadas['A\u00f1o'][i]=0" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Seleccione \u00faltimos dos a\u00f1os y calcule el indicador: art\u00edculos por grupo sobre autores del grupo." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import datetime\n", "resultados=pd.DataFrame(np.array([[np.nan,np.nan,np.nan,np.nan]]),columns=['Grupo','Articulos','Autores','Articulos/(autores grupo)'])\n", "resultados=resultados.dropna()\n", "publicadas5=publicadas[publicadas['A\u00f1o']>datetime.datetime.now().year-2]\n", "publicadas=publicadas5.reset_index(drop=True)\n", "for i in publicadas5['Grupo'].value_counts().keys():\n", " test=publicadas5[publicadas5['Grupo']==i]\n", " autoresgrupo={}\n", " for j in test['Autores de la UdeA'].value_counts().keys():\n", " for k in j.split('; '):\n", " autoresgrupo[k]=1\n", " \n", " resultados=resultados.append({'Grupo':i,'Articulos':test['Grupo'].value_counts().values[0],'Autores':len(autoresgrupo),'Articulos/(autores grupo)':round(float(test['Grupo'].value_counts().values[0])/len(autoresgrupo),2)},ignore_index=True)\n", "\n", "resultados.sort(columns='Articulos/(autores grupo)',ascending=False).reset_index(drop=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GrupoArticulosAutoresArticulos/(autores grupo)
0 GMC: Grupo de Materia Condensada 43 1 43
1 GES: Estado S\u00f3lido 21 6 3.5
2 GOF: Grupo \u00d3ptica y Fot\u00f3nica 9 3 3
3 GFIF: Grupo de Fenomenolog\u00eda de Interacciones ... 13 5 2.6
4 MS: Magnetismo y Simulaci\u00f3n 6 3 2
5 GFAM: Grupo de F\u00edsica At\u00f3mica y Molecular 8 4 2
6 FACOM: F\u00edsica y Astrof\u00edsica Computacional 3 2 1.5
7 GES: Estado S\u00f3lido; GMC: Grupo de Materia Cond... 2 2 1
\n", "
" ], "output_type": "pyout", "prompt_number": 4, "text": [ " Grupo Articulos Autores \\\n", "0 GMC: Grupo de Materia Condensada 43 1 \n", "1 GES: Estado S\u00f3lido 21 6 \n", "2 GOF: Grupo \u00d3ptica y Fot\u00f3nica 9 3 \n", "3 GFIF: Grupo de Fenomenolog\u00eda de Interacciones ... 13 5 \n", "4 MS: Magnetismo y Simulaci\u00f3n 6 3 \n", "5 GFAM: Grupo de F\u00edsica At\u00f3mica y Molecular 8 4 \n", "6 FACOM: F\u00edsica y Astrof\u00edsica Computacional 3 2 \n", "7 GES: Estado S\u00f3lido; GMC: Grupo de Materia Cond... 2 2 \n", "\n", " Articulos/(autores grupo) \n", "0 43 \n", "1 3.5 \n", "2 3 \n", "3 2.6 \n", "4 2 \n", "5 2 \n", "6 1.5 \n", "7 1 " ] } ], "prompt_number": 4 } ], "metadata": {} } ] }