{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "edab77df", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from datetime import datetime\n", "import os.path\n", "import requests\n", "import json" ] }, { "cell_type": "markdown", "id": "309f2a25", "metadata": {}, "source": [ "# Data cleaning" ] }, { "cell_type": "markdown", "id": "c837b608", "metadata": {}, "source": [ "## Processing `paper_references.csv`\n", "Null values in the DOI column are represented as different strings, so we remove all values that do not look like a DOI (do not start witn \"10.\")" ] }, { "cell_type": "code", "execution_count": 17, "id": "2b3ca1cd", "metadata": {}, "outputs": [], "source": [ "papers_df = pd.read_csv(\"raw-paper_references.csv\")" ] }, { "cell_type": "code", "execution_count": 18, "id": "f58ca714", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDvalidNo_de_Refnew_reffilenameDOITitleYearJournalPagesIssueAbstractAuthorsVolumeCorresponding_authorCountryCountry_name
011111-s2.0-S092633731000086X-main10.1016/j.apcatb.2010.02.030\"Effect of silver doping on the TiO2 for photo...2010APPLIED CATALYSIS B-ENVIRONMENTAL239NaN\"Pure TiO2 and various silver-enriched TiO2 po...\"K. Ko\\u010d\\u00ed, K. Mat\\u011bj\\u016f, L. Ob...96kamila.koci@vsb.cz\"CZ,CZ,CZ,CZ,CZ,CZ,CZ,CZ,CZ\"\"Czech Republic,Czech Republic,Czech Republic,...
1212210.1016@j.cattod.2009.07.06710.1016/j.cattod.2009.07.067\"CO2 reforming into fuel using TiO2 photocatal...2009CATALYSIS TODAY341NaN\"It was previously reported that CO2 could be ...\"A. Nishimura, N. Komatsu, G. Mitsui, M. Hirot...148nisimura@mach.mie-u.ac.jp\"JP,JP,JP,JP,AU\"\"Japan,Japan,Japan,Japan,Australia\"
231331-s2.0-S0926860X05006125-main10.1016/j.apcata.2005.08.021\"Photo reduction of CO2 to methanol using opti...2005APPLIED CATALYSIS A-GENERAL194NaN\"Greenhouse gases such as CO2 are the primary ...\"J.C.S. Wu, H-M. Lin ,C-L. Lai\"269cswu@ntu.edu.tw\"TW,TW,TW\"\"Taiwan,Taiwan,Taiwan\"
3414427362718_Photo_reduction_of_CO2_to_methanol_vi...10.1155/S1110662X05000176\"Photo reduction of CO2 to methanol via TiO2 p...2005INTERNATIONAL JOURNAL OF PHOTOENERGY115NaN\"Greenhouse gas such as CO2 is the primary cau...\"J.C.S. Wu, H-M. Lin ,C-L. Lai\"7cswu@ntu.edu.tw\"TW,TW\"\"Taiwan,Taiwan\"
4515510.1023@A@101140332030110.1023/A:1011403320301\"A new type of photocatalysis initiated by pho...2000CATALYSIS SURVEYS FROM JAPAN1072\"ZrO2 has been found to be an effective photoc...\"S. Yoshida, Y. Kohno\"4artleafs.admin@artleafs.eu\"JP,JP\"\"Japan,Japan\"
5616610.1016@S0360-5442(98)00070-X10.1016/S0360-5442(98)00070-X\"Photocatalytic reduction of CO2 using TiO2 po...1999ENERGY21NaN\"At present, carbon dioxide is considered the ...\"S. Kaneco, H. Kurimoto, Y. Shimizu, K. Ohta ,...24kaneco@chem.mie-u.ac.jp\"JP,JP,JP,JP\"\"Japan,Japan,Japan,Japan\"
6717710.1016@j.cattod.2009.07.08110.1016/j.cattod.2009.07.081\"Photocatalytic reduction of CO2 with H2O on P...2009CATALYSIS TODAY335NaN\"A gas–solid heterogeneous system for so...\"Q-H. Zhang, W-D. Han, Y-H. Hong ,J-G. Yu\"148qhzhang@ecust.edu.cn\"CN,CN,CN,CN\"\"China,China,China,China\"
7818810.1016@j.jcis.2010.12.03410.1016/j.jcis.2010.12.034\"Photocatalytic reduction of CO2 in methanol t...2011JOURNAL OF COLLOID AND INTERFACE SCIENCE257NaN\"Photocatalytic reduction of CO(2) on CuO-TiO(...\"S. Qin, F. Xin, Y. Liu, X. Yin ,W. Ma \"356xinf@tju.edu.cn\"CN,CN,CN,CN,CN\"\"China,China,China,China,China\"
8919910.1016@j.solmat.2007.06.00310.1016/j.solmat.2007.06.003\"Photoreduction of carbon dioxide with H2 and ...2007SOLAR ENERGY MATERIALS AND SOLAR CELLS1765NaN\"The photocatalytic reduction of carbon dioxid...\"C-C. Lo, C-H. Hung, C-S. Yuan ,J-F. Wu\"91ycsngi@mail.nsysu.edu.tw\"TW,TW,TW,TW\"\"Taiwan,Taiwan,Taiwan,Taiwan\"
9101101010.1007@s10563-009-9065-910.1007/s10563-009-9065-9\"Photocatalytic reduction of greenhouse gas CO...2009CATALYSIS SURVEYS FROM ASIA30NaN\"Sun is the Earth’s ultimate and inexhau...\"J.C.S. Wu\"13cswu@ntu.edu.tw\"TW\"\"Taiwan\"
10111111110.1039@B400326H10.1039/B400326H\"Improving photoreduction of CO2 with homogene...2004CHEMICAL COMMUNICATIONS1234NaN\"Significantly improved photoconversion Of CO2...\"P. Pathak, M.J. Meziani, Y. Li, L.T. Cureton ...0syaping@clemson.edu\"US,US,US,US,US\"\"USA,USA,USA,USA,USA\"
11121121210.1021@ja910091z10.1021/ja910091z\"Efficient and Clean Photoreduction of CO2 to ...2010JOURNAL OF THE AMERICAN CHEMICAL SOCIETY2132NaN\"A hybrid enzyme−nanoparticle system is ...\"T.W. Woolerton, S. Sheard, E. Reisner, E. Pie...132fraser.armstrong@chem.ox.ac.uk\"GB,GB,GB,GB,GB,GB\"\"UK,UK,UK,UK,UK,UK\"
12131131310.1016@j.carbon.2006.11.02810.1016/j.carbon.2006.11.028\"Preparation of multi-walled carbon nanotube s...2007CARBON717NaN\"Multi-walled carbon nanotube (MWCNT) supporte...\"X-H. Xia, Z-J. Jia, Y. Yu, Y. Liang, Z. Wang ...45xiaxh@phy.ccnu.edu.cn\"CN,CN,CN,CN,CN,CN\"\"China,China,China,China,China,China\"
13141141410.1021@ja101318k10.1021/ja101318k\"Artificial Photosynthesis over Crystalline Ti...2010JOURNAL OF THE AMERICAN CHEMICAL SOCIETY8398NaN\"The mechanism of photocatalytic conversion of...\"C-C. Yang, Y-H. Yu, B. van der Linden, J.C.S....132G.Mul@tnw.utwente.nl\"NL,NL,NL,NL,NL\"\"Netherlands,Netherlands,Netherlands,Netherlan...
14151151510.1021@ja200804h10.1021/ja200804h\"Carbon Nanoparticles as Visible-light Photoca...2011JOURNAL OF THE AMERICAN CHEMICAL SOCIETY4754NaN\"Increasing atmospheric CO2 levels have genera...\"L. Cao, S. Sahu, P. Anilkumar, C.E. Bunker, J...133christopher.bunker@wpafb.af.mil\"US,US,US,US,US,US,US,US,US,US\"\"USA,USA,USA,USA,USA,USA,USA,USA,USA,USA\"
15161161610.1016@j.cattod.2010.08.026.pdf10.1016/j.cattod.2010.08.026\"Comparison of te pure TiO2 and kaolinite/TiO2...2011CATALYSIS TODAY105NaN\"The kaolinite/TiO2 composite was prepared usi...\"K. Ko\\u010d\\u00ed, V. Mat\\u011bjka, P. Kov\\u0...161kamila.koci@vsb.cz\"CZ,CZ,CZ,CZ,CZ\"\"Czech Republic,Czech Republic,Czech Republic,...
16171171710.1007@s11244-007-9022-710.1007/s11244-007-9022-7\"Application of optical-fiber photo for CO2 ph...2008TOPICS IN CATALYSIS131NaN\"An optical-fiber photoreactor, comprised of 2...\"J.C.S. Wu, T-H. Wu, T. Chu, H. Huang ,D. Tsai\"47cswu@ntu.edu.tw\"TW,TW,TW,TW,TW\"\"Taiwan,Taiwan,Taiwan,Taiwan,Taiwan\"
17181181810.1039@C0EE00780C10.1039/C0EE00780C\"CO2 photoreduction at enzyme-modified metal o...2011ENERGY & ENVIRONMENTAL SCIENCE2393NaN\"A model system for photoreduction of CO2 to C...\"T.W. Woolerton, S. Sheard. E. Pierce S. W. R...4fraser.armstrong@chem.ox.ac.uk\"GB,GB,US,US,GB\"\"UK,UK,USA,USA,UK\"
18191191910.1021@ja108791u10.1021/ja108791u\"Role of water and carbonates in photocatalyti...2011JOURNAL OF THE AMERICAN CHEMICAL SOCIETY3964NaN\"Using the electron paramagnetic resonance tec...\"N.M. Dimitrijevic, B.K. Vijayan, O.G. Poluekt...133dimitrijevic@anl.gov\"US,US,US,US,US,US,US\"\"USA,USA,USA,USA,USA,USA,USA\"
19201202010.1016@S1872-20671060199-410.1016/S1872-2067(10)60199-4\"Wavelenght effect on photocatalytic reduction...2011CHINESE JOURNAL OF CATALYSIS812NaN\"Photocatalytic reduction of CO2 by water was ...\"K. Koci, K. Zatloukalova, L. Obalova, S. Krej...32kamila.koci@vsb.cz\"CZ,CZ,CZ,CZ,CZ,CZ,CZ,CZ\"\"Czech Republic,Czech Republic,Czech Republic,...
20211212110.1016@j.fuproc.2010.09.00710.1016/j.fuproc.2010.09.007\"Photoinduced activation of CO2 on TiO2 surfac...2011FUEL PROCESSING TECHNOLOGY805NaN\"Chemical processes that utilize CO2 emissions...\"V.P. Indrakanti, J.D. Kubicki ,H.H. Schobert\"92pradeep.iv@gmail.com\"US,US,US\"\"USA,USA,USA\"
21221222210.1016@j.jclepro.2009.02.01610.1016/j.jclepro.2009.02.016\"Photo-catalytic reduction of carbon dioxide w...2009JOURNAL OF CLEANER PRODUCTION1025NaN\"CO2 is photo-catalytically reduced to produce...\"Z. Zhao, J.Fan, M. Xie, Z. Wang\"17zzh1972129@163.com\"CN,CN,CN,CN\"\"China,China,China,China\"
22231232310.1021@jz900003210.1021/jz9000032\"Visible Light Photoreduction of CO2 Using CdS...2010JOURNAL OF PHYSICAL CHEMISTRY LETTERS48NaN\"A series of CdSe quantum dot (QD)-sensitized ...\"C. Wang, R.L. Thompson, J. Baltrus ,C. Matranga\"1congjun.wang@pp.netl.doe.gov\"US,US,US,US\"\"USA,USA,USA,USA\"
23241242410.1016@j.apcatb.2009.01.01010.1016/j.apcatb.2009.01.010\"Effect of TiO2 particle size on the photocata...2009APPLIED CATALYSIS B-ENVIRONMENTAL494NaN\"Pure TiO2 anatase particles with a crystallit...\"K.Koci, L. Obalova, L. Matejova, D.Placa, Z. ...89kamila.koci@vsb.cz\"CZ,CZ,CZ,CZ,CZ,CZ,CZ\"\"Czech Republic,Czech Republic,Czech Republic,...
24251252510.1007@s11244-006-0100-z10.1007/s11244-006-0100-z\"Dye sensitized CO2 reduction over pure and pl...2007TOPICS IN CATALYSIS5234\"TiO2 thin and thick films promoted with plati...\"O. Ozcan, F. Yukruk, E.U. Akkaya ,D. Uner\"44uner@metu.edu.tr\"TR,TR,TR,TR\"\"Turkey,Turkey,Turkey,Turkey\"
25261262610.1016@j.apcatb.2010.08.01510.1016/j.apcatb.2010.08.015\"Photocatalytic reduction of CO2 with H2O on m...2010APPLIED CATALYSIS B-ENVIRONMENTAL386NaN\"Photoreduction of CO2 to hydrocarbons is a su...\"Y. Li, W-N. Wang, Z. Zhan, M-H. Woo, M-H. Wu,...100liying@uwm.edu\"US,US,US,US,US,US\"\"USA,USA,USA,USA,USA,USA\"
26271272710.1016@S1010-6030(99)00113-610.1016/S1010-6030(99)00113-6\"Photo-enhanced reduction of carbon dioxide wi...1999JOURNAL OF PHOTOCHEMISTRY AND PHOTOBIOLOGY A-C...117NaN\"Photoreduction of gaseous carbon dioxide to c...\"Y. Kohno, H. Hayashi, S. Takenaka, T. Tanaka,...126artleafs.admin@artleafs.eu\"JP,JP,JP,JP,JP,JP\"\"Japan,Japan,Japan,Japan,Japan,Japan\"
272812828Kinetic_study_of_photocatalytic_reduction_of_C...\"Kinetic study of photocatalytic reduction of ...2010CHEMICAL AND PROCESS ENGINEERING-INZYNIERIA CH...395NaN\"Time dependences of photocatalytic reduction ...\"K.Koci, L. Obalova ,O. Solcova\"31kamila.koci@vsb.cz\"CZ,CZ,CZ\"\"Czech Republic,Czech Republic,Czech Republic\"
28291292910.2478@v10026-010-0022-110.2478/v10026-010-0022-1\"Photoreduction of carbon dioxide with hydroge...2010POLISH JOURNAL OF CHEMICAL TECHNOLOGY13\"The photocatalytic reduction of carbon dioxid...\"I. Kocemba, J. Nadajczyk, J. Gralski ,M.I. Sz...12ikocemba@p.lodz.pl\"PL,PL,PL,PL\"\"Poland,Poland,Poland,Poland\"
29301303010.1016@j.catcom.2008.04.00410.1016/j.catcom.2008.04.004\"Photoreduction of CO2 over Ruthenium dye-sens...2008CATALYSIS COMMUNICATIONS2073NaN\"Metal doped TiO2 catalyst sensitized with RuI...\"T-V. Nguyen, J.C.S. Wu ,C-H. Chiou\"9cswu@ntu.edu.tw\"TW,TW,TW\"\"Taiwan,Taiwan,Taiwan\"
\n", "
" ], "text/plain": [ " ID valid No_de_Ref new_ref \\\n", "0 1 1 1 1 \n", "1 2 1 2 2 \n", "2 3 1 3 3 \n", "3 4 1 4 4 \n", "4 5 1 5 5 \n", "5 6 1 6 6 \n", "6 7 1 7 7 \n", "7 8 1 8 8 \n", "8 9 1 9 9 \n", "9 10 1 10 10 \n", "10 11 1 11 11 \n", "11 12 1 12 12 \n", "12 13 1 13 13 \n", "13 14 1 14 14 \n", "14 15 1 15 15 \n", "15 16 1 16 16 \n", "16 17 1 17 17 \n", "17 18 1 18 18 \n", "18 19 1 19 19 \n", "19 20 1 20 20 \n", "20 21 1 21 21 \n", "21 22 1 22 22 \n", "22 23 1 23 23 \n", "23 24 1 24 24 \n", "24 25 1 25 25 \n", "25 26 1 26 26 \n", "26 27 1 27 27 \n", "27 28 1 28 28 \n", "28 29 1 29 29 \n", "29 30 1 30 30 \n", "\n", " filename \\\n", "0 1-s2.0-S092633731000086X-main \n", "1 10.1016@j.cattod.2009.07.067 \n", "2 1-s2.0-S0926860X05006125-main \n", "3 27362718_Photo_reduction_of_CO2_to_methanol_vi... \n", "4 10.1023@A@1011403320301 \n", "5 10.1016@S0360-5442(98)00070-X \n", "6 10.1016@j.cattod.2009.07.081 \n", "7 10.1016@j.jcis.2010.12.034 \n", "8 10.1016@j.solmat.2007.06.003 \n", "9 10.1007@s10563-009-9065-9 \n", "10 10.1039@B400326H \n", "11 10.1021@ja910091z \n", "12 10.1016@j.carbon.2006.11.028 \n", "13 10.1021@ja101318k \n", "14 10.1021@ja200804h \n", "15 10.1016@j.cattod.2010.08.026.pdf \n", "16 10.1007@s11244-007-9022-7 \n", "17 10.1039@C0EE00780C \n", "18 10.1021@ja108791u \n", "19 10.1016@S1872-20671060199-4 \n", "20 10.1016@j.fuproc.2010.09.007 \n", "21 10.1016@j.jclepro.2009.02.016 \n", "22 10.1021@jz9000032 \n", "23 10.1016@j.apcatb.2009.01.010 \n", "24 10.1007@s11244-006-0100-z \n", "25 10.1016@j.apcatb.2010.08.015 \n", "26 10.1016@S1010-6030(99)00113-6 \n", "27 Kinetic_study_of_photocatalytic_reduction_of_C... \n", "28 10.2478@v10026-010-0022-1 \n", "29 10.1016@j.catcom.2008.04.004 \n", "\n", " DOI \\\n", "0 10.1016/j.apcatb.2010.02.030 \n", "1 10.1016/j.cattod.2009.07.067 \n", "2 10.1016/j.apcata.2005.08.021 \n", "3 10.1155/S1110662X05000176 \n", "4 10.1023/A:1011403320301 \n", "5 10.1016/S0360-5442(98)00070-X \n", "6 10.1016/j.cattod.2009.07.081 \n", "7 10.1016/j.jcis.2010.12.034 \n", "8 10.1016/j.solmat.2007.06.003 \n", "9 10.1007/s10563-009-9065-9 \n", "10 10.1039/B400326H \n", "11 10.1021/ja910091z \n", "12 10.1016/j.carbon.2006.11.028 \n", "13 10.1021/ja101318k \n", "14 10.1021/ja200804h \n", "15 10.1016/j.cattod.2010.08.026 \n", "16 10.1007/s11244-007-9022-7 \n", "17 10.1039/C0EE00780C \n", "18 10.1021/ja108791u \n", "19 10.1016/S1872-2067(10)60199-4 \n", "20 10.1016/j.fuproc.2010.09.007 \n", "21 10.1016/j.jclepro.2009.02.016 \n", "22 10.1021/jz9000032 \n", "23 10.1016/j.apcatb.2009.01.010 \n", "24 10.1007/s11244-006-0100-z \n", "25 10.1016/j.apcatb.2010.08.015 \n", "26 10.1016/S1010-6030(99)00113-6 \n", "27 \n", "28 10.2478/v10026-010-0022-1 \n", "29 10.1016/j.catcom.2008.04.004 \n", "\n", " Title Year \\\n", "0 \"Effect of silver doping on the TiO2 for photo... 2010 \n", "1 \"CO2 reforming into fuel using TiO2 photocatal... 2009 \n", "2 \"Photo reduction of CO2 to methanol using opti... 2005 \n", "3 \"Photo reduction of CO2 to methanol via TiO2 p... 2005 \n", "4 \"A new type of photocatalysis initiated by pho... 2000 \n", "5 \"Photocatalytic reduction of CO2 using TiO2 po... 1999 \n", "6 \"Photocatalytic reduction of CO2 with H2O on P... 2009 \n", "7 \"Photocatalytic reduction of CO2 in methanol t... 2011 \n", "8 \"Photoreduction of carbon dioxide with H2 and ... 2007 \n", "9 \"Photocatalytic reduction of greenhouse gas CO... 2009 \n", "10 \"Improving photoreduction of CO2 with homogene... 2004 \n", "11 \"Efficient and Clean Photoreduction of CO2 to ... 2010 \n", "12 \"Preparation of multi-walled carbon nanotube s... 2007 \n", "13 \"Artificial Photosynthesis over Crystalline Ti... 2010 \n", "14 \"Carbon Nanoparticles as Visible-light Photoca... 2011 \n", "15 \"Comparison of te pure TiO2 and kaolinite/TiO2... 2011 \n", "16 \"Application of optical-fiber photo for CO2 ph... 2008 \n", "17 \"CO2 photoreduction at enzyme-modified metal o... 2011 \n", "18 \"Role of water and carbonates in photocatalyti... 2011 \n", "19 \"Wavelenght effect on photocatalytic reduction... 2011 \n", "20 \"Photoinduced activation of CO2 on TiO2 surfac... 2011 \n", "21 \"Photo-catalytic reduction of carbon dioxide w... 2009 \n", "22 \"Visible Light Photoreduction of CO2 Using CdS... 2010 \n", "23 \"Effect of TiO2 particle size on the photocata... 2009 \n", "24 \"Dye sensitized CO2 reduction over pure and pl... 2007 \n", "25 \"Photocatalytic reduction of CO2 with H2O on m... 2010 \n", "26 \"Photo-enhanced reduction of carbon dioxide wi... 1999 \n", "27 \"Kinetic study of photocatalytic reduction of ... 2010 \n", "28 \"Photoreduction of carbon dioxide with hydroge... 2010 \n", "29 \"Photoreduction of CO2 over Ruthenium dye-sens... 2008 \n", "\n", " Journal Pages Issue \\\n", "0 APPLIED CATALYSIS B-ENVIRONMENTAL 239 NaN \n", "1 CATALYSIS TODAY 341 NaN \n", "2 APPLIED CATALYSIS A-GENERAL 194 NaN \n", "3 INTERNATIONAL JOURNAL OF PHOTOENERGY 115 NaN \n", "4 CATALYSIS SURVEYS FROM JAPAN 107 2 \n", "5 ENERGY 21 NaN \n", "6 CATALYSIS TODAY 335 NaN \n", "7 JOURNAL OF COLLOID AND INTERFACE SCIENCE 257 NaN \n", "8 SOLAR ENERGY MATERIALS AND SOLAR CELLS 1765 NaN \n", "9 CATALYSIS SURVEYS FROM ASIA 30 NaN \n", "10 CHEMICAL COMMUNICATIONS 1234 NaN \n", "11 JOURNAL OF THE AMERICAN CHEMICAL SOCIETY 2132 NaN \n", "12 CARBON 717 NaN \n", "13 JOURNAL OF THE AMERICAN CHEMICAL SOCIETY 8398 NaN \n", "14 JOURNAL OF THE AMERICAN CHEMICAL SOCIETY 4754 NaN \n", "15 CATALYSIS TODAY 105 NaN \n", "16 TOPICS IN CATALYSIS 131 NaN \n", "17 ENERGY & ENVIRONMENTAL SCIENCE 2393 NaN \n", "18 JOURNAL OF THE AMERICAN CHEMICAL SOCIETY 3964 NaN \n", "19 CHINESE JOURNAL OF CATALYSIS 812 NaN \n", "20 FUEL PROCESSING TECHNOLOGY 805 NaN \n", "21 JOURNAL OF CLEANER PRODUCTION 1025 NaN \n", "22 JOURNAL OF PHYSICAL CHEMISTRY LETTERS 48 NaN \n", "23 APPLIED CATALYSIS B-ENVIRONMENTAL 494 NaN \n", "24 TOPICS IN CATALYSIS 523 4 \n", "25 APPLIED CATALYSIS B-ENVIRONMENTAL 386 NaN \n", "26 JOURNAL OF PHOTOCHEMISTRY AND PHOTOBIOLOGY A-C... 117 NaN \n", "27 CHEMICAL AND PROCESS ENGINEERING-INZYNIERIA CH... 395 NaN \n", "28 POLISH JOURNAL OF CHEMICAL TECHNOLOGY 1 3 \n", "29 CATALYSIS COMMUNICATIONS 2073 NaN \n", "\n", " Abstract \\\n", "0 \"Pure TiO2 and various silver-enriched TiO2 po... \n", "1 \"It was previously reported that CO2 could be ... \n", "2 \"Greenhouse gases such as CO2 are the primary ... \n", "3 \"Greenhouse gas such as CO2 is the primary cau... \n", "4 \"ZrO2 has been found to be an effective photoc... \n", "5 \"At present, carbon dioxide is considered the ... \n", "6 \"A gas–solid heterogeneous system for so... \n", "7 \"Photocatalytic reduction of CO(2) on CuO-TiO(... \n", "8 \"The photocatalytic reduction of carbon dioxid... \n", "9 \"Sun is the Earth’s ultimate and inexhau... \n", "10 \"Significantly improved photoconversion Of CO2... \n", "11 \"A hybrid enzyme−nanoparticle system is ... \n", "12 \"Multi-walled carbon nanotube (MWCNT) supporte... \n", "13 \"The mechanism of photocatalytic conversion of... \n", "14 \"Increasing atmospheric CO2 levels have genera... \n", "15 \"The kaolinite/TiO2 composite was prepared usi... \n", "16 \"An optical-fiber photoreactor, comprised of 2... \n", "17 \"A model system for photoreduction of CO2 to C... \n", "18 \"Using the electron paramagnetic resonance tec... \n", "19 \"Photocatalytic reduction of CO2 by water was ... \n", "20 \"Chemical processes that utilize CO2 emissions... \n", "21 \"CO2 is photo-catalytically reduced to produce... \n", "22 \"A series of CdSe quantum dot (QD)-sensitized ... \n", "23 \"Pure TiO2 anatase particles with a crystallit... \n", "24 \"TiO2 thin and thick films promoted with plati... \n", "25 \"Photoreduction of CO2 to hydrocarbons is a su... \n", "26 \"Photoreduction of gaseous carbon dioxide to c... \n", "27 \"Time dependences of photocatalytic reduction ... \n", "28 \"The photocatalytic reduction of carbon dioxid... \n", "29 \"Metal doped TiO2 catalyst sensitized with RuI... \n", "\n", " Authors Volume \\\n", "0 \"K. Ko\\u010d\\u00ed, K. Mat\\u011bj\\u016f, L. Ob... 96 \n", "1 \"A. Nishimura, N. Komatsu, G. Mitsui, M. Hirot... 148 \n", "2 \"J.C.S. Wu, H-M. Lin ,C-L. Lai\" 269 \n", "3 \"J.C.S. Wu, H-M. Lin ,C-L. Lai\" 7 \n", "4 \"S. Yoshida, Y. Kohno\" 4 \n", "5 \"S. Kaneco, H. Kurimoto, Y. Shimizu, K. Ohta ,... 24 \n", "6 \"Q-H. Zhang, W-D. Han, Y-H. Hong ,J-G. Yu\" 148 \n", "7 \"S. Qin, F. Xin, Y. Liu, X. Yin ,W. Ma \" 356 \n", "8 \"C-C. Lo, C-H. Hung, C-S. Yuan ,J-F. Wu\" 91 \n", "9 \"J.C.S. Wu\" 13 \n", "10 \"P. Pathak, M.J. Meziani, Y. Li, L.T. Cureton ... 0 \n", "11 \"T.W. Woolerton, S. Sheard, E. Reisner, E. Pie... 132 \n", "12 \"X-H. Xia, Z-J. Jia, Y. Yu, Y. Liang, Z. Wang ... 45 \n", "13 \"C-C. Yang, Y-H. Yu, B. van der Linden, J.C.S.... 132 \n", "14 \"L. Cao, S. Sahu, P. Anilkumar, C.E. Bunker, J... 133 \n", "15 \"K. Ko\\u010d\\u00ed, V. Mat\\u011bjka, P. Kov\\u0... 161 \n", "16 \"J.C.S. Wu, T-H. Wu, T. Chu, H. Huang ,D. Tsai\" 47 \n", "17 \"T.W. Woolerton, S. Sheard. E. Pierce S. W. R... 4 \n", "18 \"N.M. Dimitrijevic, B.K. Vijayan, O.G. Poluekt... 133 \n", "19 \"K. Koci, K. Zatloukalova, L. Obalova, S. Krej... 32 \n", "20 \"V.P. Indrakanti, J.D. Kubicki ,H.H. Schobert\" 92 \n", "21 \"Z. Zhao, J.Fan, M. Xie, Z. Wang\" 17 \n", "22 \"C. Wang, R.L. Thompson, J. Baltrus ,C. Matranga\" 1 \n", "23 \"K.Koci, L. Obalova, L. Matejova, D.Placa, Z. ... 89 \n", "24 \"O. Ozcan, F. Yukruk, E.U. Akkaya ,D. Uner\" 44 \n", "25 \"Y. Li, W-N. Wang, Z. Zhan, M-H. Woo, M-H. Wu,... 100 \n", "26 \"Y. Kohno, H. Hayashi, S. Takenaka, T. Tanaka,... 126 \n", "27 \"K.Koci, L. Obalova ,O. Solcova\" 31 \n", "28 \"I. Kocemba, J. Nadajczyk, J. Gralski ,M.I. Sz... 12 \n", "29 \"T-V. Nguyen, J.C.S. Wu ,C-H. Chiou\" 9 \n", "\n", " Corresponding_author Country \\\n", "0 kamila.koci@vsb.cz \"CZ,CZ,CZ,CZ,CZ,CZ,CZ,CZ,CZ\" \n", "1 nisimura@mach.mie-u.ac.jp \"JP,JP,JP,JP,AU\" \n", "2 cswu@ntu.edu.tw \"TW,TW,TW\" \n", "3 cswu@ntu.edu.tw \"TW,TW\" \n", "4 artleafs.admin@artleafs.eu \"JP,JP\" \n", "5 kaneco@chem.mie-u.ac.jp \"JP,JP,JP,JP\" \n", "6 qhzhang@ecust.edu.cn \"CN,CN,CN,CN\" \n", "7 xinf@tju.edu.cn \"CN,CN,CN,CN,CN\" \n", "8 ycsngi@mail.nsysu.edu.tw \"TW,TW,TW,TW\" \n", "9 cswu@ntu.edu.tw \"TW\" \n", "10 syaping@clemson.edu \"US,US,US,US,US\" \n", "11 fraser.armstrong@chem.ox.ac.uk \"GB,GB,GB,GB,GB,GB\" \n", "12 xiaxh@phy.ccnu.edu.cn \"CN,CN,CN,CN,CN,CN\" \n", "13 G.Mul@tnw.utwente.nl \"NL,NL,NL,NL,NL\" \n", "14 christopher.bunker@wpafb.af.mil \"US,US,US,US,US,US,US,US,US,US\" \n", "15 kamila.koci@vsb.cz \"CZ,CZ,CZ,CZ,CZ\" \n", "16 cswu@ntu.edu.tw \"TW,TW,TW,TW,TW\" \n", "17 fraser.armstrong@chem.ox.ac.uk \"GB,GB,US,US,GB\" \n", "18 dimitrijevic@anl.gov \"US,US,US,US,US,US,US\" \n", "19 kamila.koci@vsb.cz \"CZ,CZ,CZ,CZ,CZ,CZ,CZ,CZ\" \n", "20 pradeep.iv@gmail.com \"US,US,US\" \n", "21 zzh1972129@163.com \"CN,CN,CN,CN\" \n", "22 congjun.wang@pp.netl.doe.gov \"US,US,US,US\" \n", "23 kamila.koci@vsb.cz \"CZ,CZ,CZ,CZ,CZ,CZ,CZ\" \n", "24 uner@metu.edu.tr \"TR,TR,TR,TR\" \n", "25 liying@uwm.edu \"US,US,US,US,US,US\" \n", "26 artleafs.admin@artleafs.eu \"JP,JP,JP,JP,JP,JP\" \n", "27 kamila.koci@vsb.cz \"CZ,CZ,CZ\" \n", "28 ikocemba@p.lodz.pl \"PL,PL,PL,PL\" \n", "29 cswu@ntu.edu.tw \"TW,TW,TW\" \n", "\n", " Country_name \n", "0 \"Czech Republic,Czech Republic,Czech Republic,... \n", "1 \"Japan,Japan,Japan,Japan,Australia\" \n", "2 \"Taiwan,Taiwan,Taiwan\" \n", "3 \"Taiwan,Taiwan\" \n", "4 \"Japan,Japan\" \n", "5 \"Japan,Japan,Japan,Japan\" \n", "6 \"China,China,China,China\" \n", "7 \"China,China,China,China,China\" \n", "8 \"Taiwan,Taiwan,Taiwan,Taiwan\" \n", "9 \"Taiwan\" \n", "10 \"USA,USA,USA,USA,USA\" \n", "11 \"UK,UK,UK,UK,UK,UK\" \n", "12 \"China,China,China,China,China,China\" \n", "13 \"Netherlands,Netherlands,Netherlands,Netherlan... \n", "14 \"USA,USA,USA,USA,USA,USA,USA,USA,USA,USA\" \n", "15 \"Czech Republic,Czech Republic,Czech Republic,... \n", "16 \"Taiwan,Taiwan,Taiwan,Taiwan,Taiwan\" \n", "17 \"UK,UK,USA,USA,UK\" \n", "18 \"USA,USA,USA,USA,USA,USA,USA\" \n", "19 \"Czech Republic,Czech Republic,Czech Republic,... \n", "20 \"USA,USA,USA\" \n", "21 \"China,China,China,China\" \n", "22 \"USA,USA,USA,USA\" \n", "23 \"Czech Republic,Czech Republic,Czech Republic,... \n", "24 \"Turkey,Turkey,Turkey,Turkey\" \n", "25 \"USA,USA,USA,USA,USA,USA\" \n", "26 \"Japan,Japan,Japan,Japan,Japan,Japan\" \n", "27 \"Czech Republic,Czech Republic,Czech Republic\" \n", "28 \"Poland,Poland,Poland,Poland\" \n", "29 \"Taiwan,Taiwan,Taiwan\" " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "papers_df['DOI'].replace('^(?!10).*', '', regex=True, inplace=True)\n", "papers_df.head(30)" ] }, { "cell_type": "markdown", "id": "380da452", "metadata": {}, "source": [ "### Export" ] }, { "cell_type": "code", "execution_count": 19, "id": "26eafeb9", "metadata": {}, "outputs": [], "source": [ "papers_df.to_csv(\"paper_references.csv\", index=False, sep=\",\")" ] }, { "cell_type": "markdown", "id": "5812a021", "metadata": {}, "source": [ "## OpenAlex papers" ] }, { "cell_type": "markdown", "id": "5a7ba1cf", "metadata": {}, "source": [ "### Taking list of DOIs" ] }, { "cell_type": "code", "execution_count": null, "id": "eae3705c", "metadata": {}, "outputs": [], "source": [ "papers_df = pd.read_csv(\"data/paper_references_curated.csv\", sep=';')" ] }, { "cell_type": "markdown", "id": "6bb7a29e", "metadata": {}, "source": [ "Manual corrections in erroneous DOIs in `paper_references_curated.csv`\n", "- 10.22146/ijc.841 --> 10.22146/ijc.21248\n", "- paper con doi (incorrecto) 10.1007/s11244-017-0797 is repeated, the correct DOI is 10.1007/s11244-017-0797-x (appears in a previous row)\n", "- 10.6084/m9.figshare.12715484 --> 10.1038/s41467-020-18350-7\n", "- 10.1002/anie.x0xx00000x --> 10.1002/ange.202012019\n", "- 10.1007/s10562-020-03426 --> 10.1007/s10562-020-03426-2\n", "- 10.1002/slct.20200 --> 10.1002/slct.202004839\n", "\n", "Papers without DOI:\n", "- Kinetic study of photocatalytic reduction of CO2 over TiO2 --> https://openalex.org/W1629652031\n", "- A study on the photoreduction of green house CO2 gas catalized by TiO2 to form methane an methanol --> not in OpenAlex\n", "- Selective ethanol synthesis from carbon dioxide --> 10.1595/003214097x414166170\n", "- Selective photoreduction of CO2HCO3 - to formate by aqueous suspensions and colloids of Pd-TiO2 --> 10.1021/j100372a080\n", " \n" ] }, { "cell_type": "markdown", "id": "75d2ad5b", "metadata": {}, "source": [ "### Downloading individual JSON files per paper from the DOI" ] }, { "cell_type": "code", "execution_count": null, "id": "5d14f3f0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Error with nan, id 28\n", "Error with nan, id 145\n", "Error with 10.1007/s11244-017-0797, id 519\n" ] } ], "source": [ "headers = {'Accept': 'application/json'}\n", "query ='https://api.openalex.org/works/https://doi.org/{}'\n", "\n", "#for doi in papers_df['DOI']:\n", "for index, row in papers_df.iterrows():\n", " response = requests.get(query.format(row['DOI']), headers=headers)\n", " \n", " if response.status_code == 200:\n", " res_json = response.json()\n", " res_json['solar_id'] = row['No_de_Ref']\n", " with open('data/jsonOA/'+str(row['No_de_Ref'])+'.json', 'w') as file:\n", " file.write(json.dumps(res_json, indent=4))\n", " else:\n", " print(f\"Error with {row['DOI']}, id {str(row['No_de_Ref'])}\")\n", " \n" ] }, { "cell_type": "markdown", "id": "0f14cd9c", "metadata": {}, "source": [ "### Merging individual JSONs into one single file" ] }, { "cell_type": "code", "execution_count": null, "id": "6405b1a7", "metadata": {}, "outputs": [], "source": [ "path = 'data/jsonOA/'\n", "json_file_names = os.listdir(path)\n", "json_file_names.remove('.DS_Store')" ] }, { "cell_type": "code", "execution_count": null, "id": "ceaec6be", "metadata": {}, "outputs": [], "source": [ "merged_json = []\n", "for file in json_file_names:\n", " filename = path + file\n", " with open(filename, 'r') as infile:\n", " merged_json.append(json.load(infile))\n", " \n", "with open('data/papersOA.json', 'w') as out_json:\n", " json.dump(merged_json, out_json)" ] }, { "cell_type": "markdown", "id": "c632f6bb", "metadata": {}, "source": [ "## Processing `catalystdata.csv`" ] }, { "cell_type": "code", "execution_count": 4, "id": "3bed30eb", "metadata": {}, "outputs": [], "source": [ "exp_df = pd.read_csv(\"raw-catalystsdata.csv\")" ] }, { "cell_type": "markdown", "id": "d5f74afc", "metadata": {}, "source": [ "### Cleaning null values\n", "Negative numbers and values that serve as null" ] }, { "cell_type": "code", "execution_count": 5, "id": "ab549242", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDNo_de_RefCatalyst_nameTiO2_crystal_structureCatalystSupportsupport_percentCo_catalystco_catalyst_2co_catalyst_3...C4H10_mol_m2hC5H10_mol_m2hC5H12_mol_m2hCH3OH_mol_m2hC2H5OH_mol_m2hCH3COH_mol_m2hHCOOH_mol_m2hCH2O_mol_m2hC2H4O2_mol_m2htimestamp
011TiO2AnataseTiO2NaNNaNNaNNaN...0.010000-00-00 00:00:00
121TiO2AnataseTiO2NaNAgNaNNaN...0.010000-00-00 00:00:00
231Ag/TiO2AnataseTiO2NaNAgNaNNaN...0.010000-00-00 00:00:00
341Ag/TiO2AnataseTiO2NaNAgNaNNaN...0.020000-00-00 00:00:00
451Ag/TiO2AnataseTiO2NaNAgNaNNaN...0.030000-00-00 00:00:00
562TiO2NaNTiO2NaNNaNNaNNaN...0000-00-00 00:00:00
672TiO2NaNTiO2NaNNaNNaNNaN...0000-00-00 00:00:00
783Cu/TiO2AnataseTiO2NaNCuNaNNaN...0000-00-00 00:00:00
894TiO2AnataseTiO2NaNNaNNaNNaN...0000-00-00 00:00:00
9104Cu/TiO2AnataseTiO2NaNCuNaNNaN...0000-00-00 00:00:00
10114Cu/TiO2AnataseTiO2NaNCuNaNNaN...0000-00-00 00:00:00
11124Cu/TiO2AnataseTiO2NaNCuNaNNaN...0000-00-00 00:00:00
12135ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
13145ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
14155ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
15165ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
16175ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
17185ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
18195ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
19206TiO2AnataseTiO2NaNNaNNaNNaN...0.220000-00-00 00:00:00
20217Pt/TiO2P25TiO2NaNPtNaNNaN...0000-00-00 00:00:00
21227Pt/TiO2P25TiO2NaNPtNaNNaN...0000-00-00 00:00:00
22238TiO2AnataseTiO2NaNCuONaNNaN...0000-00-00 00:00:00
23248CuO/TiO2AnataseTiO2NaNCuONaNNaN...0000-00-00 00:00:00
24259TiO2P25TiO2NaNNaNNaNNaN...0000-00-00 00:00:00
25269TiO2P25TiO2NaNNaNNaNNaN...0000-00-00 00:00:00
26279TiO2P25TiO2NaNNaNNaNNaN...0000-00-00 00:00:00
27289ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
28299ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
29309ZrO2NaNZrO2NaNNaNNaNNaN...0000-00-00 00:00:00
\n", "

30 rows × 95 columns

\n", "
" ], "text/plain": [ " ID No_de_Ref Catalyst_name TiO2_crystal_structure Catalyst Support \\\n", "0 1 1 TiO2 Anatase TiO2 NaN \n", "1 2 1 TiO2 Anatase TiO2 NaN \n", "2 3 1 Ag/TiO2 Anatase TiO2 NaN \n", "3 4 1 Ag/TiO2 Anatase TiO2 NaN \n", "4 5 1 Ag/TiO2 Anatase TiO2 NaN \n", "5 6 2 TiO2 NaN TiO2 NaN \n", "6 7 2 TiO2 NaN TiO2 NaN \n", "7 8 3 Cu/TiO2 Anatase TiO2 NaN \n", "8 9 4 TiO2 Anatase TiO2 NaN \n", "9 10 4 Cu/TiO2 Anatase TiO2 NaN \n", "10 11 4 Cu/TiO2 Anatase TiO2 NaN \n", "11 12 4 Cu/TiO2 Anatase TiO2 NaN \n", "12 13 5 ZrO2 NaN ZrO2 NaN \n", "13 14 5 ZrO2 NaN ZrO2 NaN \n", "14 15 5 ZrO2 NaN ZrO2 NaN \n", "15 16 5 ZrO2 NaN ZrO2 NaN \n", "16 17 5 ZrO2 NaN ZrO2 NaN \n", "17 18 5 ZrO2 NaN ZrO2 NaN \n", "18 19 5 ZrO2 NaN ZrO2 NaN \n", "19 20 6 TiO2 Anatase TiO2 NaN \n", "20 21 7 Pt/TiO2 P25 TiO2 NaN \n", "21 22 7 Pt/TiO2 P25 TiO2 NaN \n", "22 23 8 TiO2 Anatase TiO2 NaN \n", "23 24 8 CuO/TiO2 Anatase TiO2 NaN \n", "24 25 9 TiO2 P25 TiO2 NaN \n", "25 26 9 TiO2 P25 TiO2 NaN \n", "26 27 9 TiO2 P25 TiO2 NaN \n", "27 28 9 ZrO2 NaN ZrO2 NaN \n", "28 29 9 ZrO2 NaN ZrO2 NaN \n", "29 30 9 ZrO2 NaN ZrO2 NaN \n", "\n", " support_percent Co_catalyst co_catalyst_2 co_catalyst_3 ... C4H10_mol_m2h \\\n", "0 NaN NaN NaN ... \n", "1 Ag NaN NaN ... \n", "2 Ag NaN NaN ... \n", "3 Ag NaN NaN ... \n", "4 Ag NaN NaN ... \n", "5 NaN NaN NaN ... \n", "6 NaN NaN NaN ... \n", "7 Cu NaN NaN ... \n", "8 NaN NaN NaN ... \n", "9 Cu NaN NaN ... \n", "10 Cu NaN NaN ... \n", "11 Cu NaN NaN ... \n", "12 NaN NaN NaN ... \n", "13 NaN NaN NaN ... \n", "14 NaN NaN NaN ... \n", "15 NaN NaN NaN ... \n", "16 NaN NaN NaN ... \n", "17 NaN NaN NaN ... \n", "18 NaN NaN NaN ... \n", "19 NaN NaN NaN ... \n", "20 Pt NaN NaN ... \n", "21 Pt NaN NaN ... \n", "22 CuO NaN NaN ... \n", "23 CuO NaN NaN ... \n", "24 NaN NaN NaN ... \n", "25 NaN NaN NaN ... \n", "26 NaN NaN NaN ... \n", "27 NaN NaN NaN ... \n", "28 NaN NaN NaN ... \n", "29 NaN NaN NaN ... \n", "\n", " C5H10_mol_m2h C5H12_mol_m2h CH3OH_mol_m2h C2H5OH_mol_m2h CH3COH_mol_m2h \\\n", "0 0.01 \n", "1 0.01 \n", "2 0.01 \n", "3 0.02 \n", "4 0.03 \n", "5 \n", "6 \n", "7 \n", "8 \n", "9 \n", "10 \n", "11 \n", "12 \n", "13 \n", "14 \n", "15 \n", "16 \n", "17 \n", "18 \n", "19 \n", "20 \n", "21 \n", "22 \n", "23 \n", "24 \n", "25 \n", "26 \n", "27 \n", "28 \n", "29 \n", "\n", " HCOOH_mol_m2h CH2O_mol_m2h C2H4O2_mol_m2h timestamp \n", "0 0000-00-00 00:00:00 \n", "1 0000-00-00 00:00:00 \n", "2 0000-00-00 00:00:00 \n", "3 0000-00-00 00:00:00 \n", "4 0000-00-00 00:00:00 \n", "5 0000-00-00 00:00:00 \n", "6 0000-00-00 00:00:00 \n", "7 0000-00-00 00:00:00 \n", "8 0000-00-00 00:00:00 \n", "9 0000-00-00 00:00:00 \n", "10 0000-00-00 00:00:00 \n", "11 0000-00-00 00:00:00 \n", "12 0000-00-00 00:00:00 \n", "13 0000-00-00 00:00:00 \n", "14 0000-00-00 00:00:00 \n", "15 0000-00-00 00:00:00 \n", "16 0000-00-00 00:00:00 \n", "17 0000-00-00 00:00:00 \n", "18 0000-00-00 00:00:00 \n", "19 0.22 0000-00-00 00:00:00 \n", "20 0000-00-00 00:00:00 \n", "21 0000-00-00 00:00:00 \n", "22 0000-00-00 00:00:00 \n", "23 0000-00-00 00:00:00 \n", "24 0000-00-00 00:00:00 \n", "25 0000-00-00 00:00:00 \n", "26 0000-00-00 00:00:00 \n", "27 0000-00-00 00:00:00 \n", "28 0000-00-00 00:00:00 \n", "29 0000-00-00 00:00:00 \n", "\n", "[30 rows x 95 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exp_df.replace([-1,\"-1\", 0.0, 9999.99], '', inplace=True)\n", "exp_df.head(30)" ] }, { "cell_type": "markdown", "id": "91763b1f", "metadata": {}, "source": [ "### Cleaning values in field `Masscat_g`\n", "Some values are float (g), others have different measurements corresponding to different concepts or even comments. Filtering out all that do not correspond to grams." ] }, { "cell_type": "code", "execution_count": 9, "id": "f26093cd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0.1\n", "1 0.1\n", "2 0.1\n", "3 0.1\n", "4 0.1\n", " ... \n", "6659 0.1\n", "6660 0.02\n", "6661 0.02\n", "6662 0.02\n", "6663 0.02\n", "Name: Masscat_g, Length: 6664, dtype: object" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def clean_masscat(masscat):\n", " try:\n", " masscat = float(masscat)\n", " return(masscat)\n", " \n", " except ValueError or TypeError:\n", " return('')\n", " \n", "\n", "exp_df.Masscat_g = exp_df.Masscat_g.apply(clean_masscat)\n", "exp_df.Masscat_g" ] }, { "cell_type": "markdown", "id": "3b3db39e", "metadata": {}, "source": [ "### Aligning values with ontology hierarchies" ] }, { "cell_type": "markdown", "id": "a69eefc5", "metadata": {}, "source": [ "#### Reactor types" ] }, { "cell_type": "code", "execution_count": 19, "id": "e3f85af3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Slurry', 'Fixed-bed', 'OpticalFiber', 'Monolithic', 'Membrane',\n", " 'Fluidised-bed', ''], dtype=object)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exp_df.Reactor_type.replace(('\\s', 'Batch'), '', regex=True, inplace=True)\n", "exp_df.Reactor_type.unique()" ] }, { "cell_type": "markdown", "id": "d57c6af8", "metadata": {}, "source": [ "#### Light Sources" ] }, { "cell_type": "code", "execution_count": 11, "id": "399565aa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['UV', 'Solar', 'Visible', 'UV-Vis', 'SolarSimulator',\n", " 'Monochromatic', 'Vis'], dtype=object)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## incompleto, mitad de jerarquía con obi: y la otra con solar:\n", "exp_df.Light_source.replace('\\s', '', regex=True, inplace=True)\n", "exp_df.Light_source.unique()" ] }, { "cell_type": "markdown", "id": "453a1453", "metadata": {}, "source": [ "#### Lamps" ] }, { "cell_type": "code", "execution_count": 15, "id": "b434b9bb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Mercury', 'Fluorescent', 'Xenon', 'SolarSimulator', 'Halogen', '',\n", " 'Tungsten-Halide', 'Mercury-Xenon', 'Other', 'LED', 'Tungsten',\n", " nan], dtype=object)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exp_df.Lamp.replace({\"Mercury(Hg)\":\"Mercury\", \n", " \"Xenon(Xe)\":\"Xenon\", \n", " \"Solar\":\"SolarSimulator\", \n", " \"Not spedified\":\"\", \n", " \"Mercury-Xenon(Hg-Xe)\":\"Mercury-Xenon\", \n", " \"Tungsten(W)\":\"Tungsten\"}, inplace=True)\n", "exp_df.Lamp.unique()" ] }, { "cell_type": "markdown", "id": "88d2ab6f", "metadata": {}, "source": [ "#### Wavelengths" ] }, { "cell_type": "code", "execution_count": 13, "id": "1c07d5f1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['192-280', '315-400', '280-315', '400-780', None, '315-780',\n", " '192-780', '280-780', ''], dtype=object)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def set_wavelengths(wl):\n", " try:\n", " wl = float(wl)\n", " if (wl) >= 192 and (wl) <= 280:\n", " return(\"192-280\")\n", " elif (wl) >= 280 and (wl) <= 315:\n", " return(\"280-315\")\n", " elif (wl) >= 315 and (wl) <= 400:\n", " return(\"315-400\")\n", " elif (wl) >= 400 and (wl) <= 780:\n", " return(\"400-780\")\n", " elif (wl) >= 315 and (wl) <= 780:\n", " return(\"315-780\")\n", " elif (wl) >= 280 and (wl) <= 780:\n", " return(\"280-780\")\n", " elif (wl) >= 192 and (wl) <= 780:\n", " return(\"192-780\")\n", " \n", " except ValueError:\n", " wl = wl.replace('192-280(UV-A)', '192-280')\n", " wl = wl.replace('315-400(UV-C)', '315-400')\n", " wl = wl.replace('280-315(UV-B)', '280-315')\n", " return(wl)\n", " \n", " except TypeError:\n", " return('')\n", " \n", "\n", "exp_df.Wavelength_nm = exp_df.Wavelength_nm.apply(set_wavelengths)\n", "exp_df.Wavelength_nm.unique() \n" ] }, { "cell_type": "markdown", "id": "b656c7f4", "metadata": {}, "source": [ "### Export" ] }, { "cell_type": "code", "execution_count": 20, "id": "356e98a7", "metadata": {}, "outputs": [], "source": [ "exp_df.to_csv(\"catalystsdata.csv\", index=False, sep=\",\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }