{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ee929161-1b4b-47f1-8b13-6640ad6f48e4", "metadata": {}, "outputs": [], "source": [ "# import libraries for json import \n", "import json\n", "import urllib.request, json \n", "\n", "# import json for visualiziation\n", "import pandas as pd\n", "from pandas.io.json import json_normalize\n", "\n", "# import Matpltlib for data visualisation\n", "import matplotlib.pyplot as plt\n", "\n", "# import Numpy for data handling\n", "import numpy as np" ] }, { "cell_type": "markdown", "id": "bb5b0d72-50ab-47fd-8209-99356d96af44", "metadata": { "tags": [] }, "source": [ "# Get Metadata from DataCite REST API" ] }, { "cell_type": "code", "execution_count": 2, "id": "4fc9af36-be55-46a4-b6e9-2a7584e06923", "metadata": {}, "outputs": [], "source": [ "\n", "# connect to DataCite REST API and get items with own search query, e.g. query=Max%20Planck&page[size]=1000'\n", "with urllib.request.urlopen('https://api.datacite.org/dois?query=Ilia%20State%20University&page[size]=5000') as url:\n", " \n", "# create a json file out of API call\n", " DataCite = json.load(url)\n", "\n", "# create and open a new json file \n", "with open('DataCite_Raw_Metadata.json', 'w') as f:\n", " # write results in json file\n", " json.dump(DataCite, f)\n", "\n", "# load to normalize json file\n", "data = json.load(open('DataCite_Raw_Metadata.json'))\n", "\n", "# load to normalize json file\n", "data_normalized = pd.json_normalize(data,'data')\n" ] }, { "cell_type": "markdown", "id": "9ada51f1-1459-4ce6-abe6-df9f75702fe0", "metadata": {}, "source": [ "## Create Pandas Dataframe as Preview" ] }, { "cell_type": "code", "execution_count": 3, "id": "713d9258-494f-416c-aaed-9f63cd9624f7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | id | \n", "type | \n", "attributes.doi | \n", "attributes.identifiers | \n", "attributes.creators | \n", "attributes.titles | \n", "attributes.publisher | \n", "attributes.publicationYear | \n", "attributes.subjects | \n", "attributes.contributors | \n", "... | \n", "attributes.versionOfCount | \n", "attributes.created | \n", "attributes.registered | \n", "attributes.published | \n", "attributes.updated | \n", "relationships.client.data.id | \n", "relationships.client.data.type | \n", "attributes.container.type | \n", "attributes.container.identifier | \n", "attributes.container.identifierType | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "10.5281/zenodo.7223794 | \n", "dois | \n", "10.5281/zenodo.7223794 | \n", "[{'identifier': 'https://zenodo.org/record/722... | \n", "[{'name': 'Grossmann, Yves Vincent', 'givenNam... | \n", "[{'title': 'Research Data Policies – A Short O... | \n", "Zenodo | \n", "2022 | \n", "[{'subject': 'ISUBII22'}, {'subject': 'Ilia St... | \n", "[] | \n", "... | \n", "0 | \n", "2022-10-26T12:15:33Z | \n", "2022-10-26T12:15:34Z | \n", "None | \n", "2022-10-26T12:15:34Z | \n", "cern.zenodo | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 1 | \n", "10.5281/zenodo.7223795 | \n", "dois | \n", "10.5281/zenodo.7223795 | \n", "[] | \n", "[{'name': 'Grossmann, Yves Vincent', 'givenNam... | \n", "[{'title': 'Research Data Policies – A Short O... | \n", "Zenodo | \n", "2022 | \n", "[{'subject': 'ISUBII22'}, {'subject': 'Ilia St... | \n", "[] | \n", "... | \n", "0 | \n", "2022-10-26T12:15:32Z | \n", "2022-10-26T12:15:33Z | \n", "None | \n", "2022-10-26T12:15:33Z | \n", "cern.zenodo | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 2 | \n", "10.5281/zenodo.7243699 | \n", "dois | \n", "10.5281/zenodo.7243699 | \n", "[{'identifier': 'https://zenodo.org/record/724... | \n", "[{'name': 'Grossmann, Yves Vincent', 'givenNam... | \n", "[{'title': 'DataCite ROR Parser'}] | \n", "Zenodo | \n", "2022 | \n", "[{'subject': 'ISUBII22'}, {'subject': 'Python'... | \n", "[] | \n", "... | \n", "0 | \n", "2022-10-24T08:08:18Z | \n", "2022-10-24T08:08:19Z | \n", "None | \n", "2022-10-26T06:40:27Z | \n", "cern.zenodo | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 3 | \n", "10.5281/zenodo.7243700 | \n", "dois | \n", "10.5281/zenodo.7243700 | \n", "[] | \n", "[{'name': 'Grossmann, Yves Vincent', 'givenNam... | \n", "[{'title': 'DataCite ROR Parser'}] | \n", "Zenodo | \n", "2022 | \n", "[{'subject': 'ISUBII22'}, {'subject': 'Python'... | \n", "[] | \n", "... | \n", "0 | \n", "2022-10-24T08:08:17Z | \n", "2022-10-24T08:08:18Z | \n", "None | \n", "2022-10-26T06:40:26Z | \n", "cern.zenodo | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4 | \n", "10.5281/zenodo.7225465 | \n", "dois | \n", "10.5281/zenodo.7225465 | \n", "[{'identifier': 'https://zenodo.org/record/722... | \n", "[{'name': 'Grossmann, Yves Vincent', 'givenNam... | \n", "[{'title': 'Research Data Management – A Short... | \n", "Zenodo | \n", "2022 | \n", "[{'subject': 'ISUBII22'}, {'subject': 'Ilia St... | \n", "[] | \n", "... | \n", "0 | \n", "2022-10-25T07:15:22Z | \n", "2022-10-25T07:15:22Z | \n", "None | \n", "2022-10-25T07:15:22Z | \n", "cern.zenodo | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 97 | \n", "10.13140/rg.2.1.3170.7766 | \n", "dois | \n", "10.13140/rg.2.1.3170.7766 | \n", "[{'identifier': 'https://doi.org/10.13140/rg.2... | \n", "[{'name': 'Tumanishvili, George G.', 'nameType... | \n", "[{'title': 'ხელშეკრულების შედგენის ტექნიკა და ... | \n", "ILIA STATE UNIVERSITY PRESS | \n", "2012 | \n", "[] | \n", "[] | \n", "... | \n", "0 | \n", "2016-06-26T21:47:27Z | \n", "2016-06-26T21:47:28Z | \n", "None | \n", "2020-06-29T13:28:40Z | \n", "rg.rg | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 98 | \n", "10.13140/rg.2.1.1126.8720 | \n", "dois | \n", "10.13140/rg.2.1.1126.8720 | \n", "[{'identifier': 'https://doi.org/10.13140/rg.2... | \n", "[{'name': 'Tumanishvili, George G.', 'nameType... | \n", "[{'title': 'სამართლის პროფესიები'}] | \n", "ILIA STATE UNIVERSITY PRESS | \n", "2009 | \n", "[] | \n", "[] | \n", "... | \n", "0 | \n", "2016-06-26T17:15:55Z | \n", "2016-06-26T17:15:56Z | \n", "None | \n", "2020-06-29T13:28:40Z | \n", "rg.rg | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 99 | \n", "10.13140/rg.2.1.3944.9206 | \n", "dois | \n", "10.13140/rg.2.1.3944.9206 | \n", "[{'identifier': 'https://doi.org/10.13140/rg.2... | \n", "[{'name': 'Godoladze, Karlo', 'nameType': 'Per... | \n", "[{'title': 'საკონსტიტუციო ცვლილებები საქართველ... | \n", "ილიას სახელმწიფო უნივერსიტეტის გამომცემლობა, I... | \n", "2013 | \n", "[] | \n", "[] | \n", "... | \n", "0 | \n", "2016-06-26T16:37:37Z | \n", "2016-06-26T16:37:38Z | \n", "None | \n", "2020-06-29T13:28:40Z | \n", "rg.rg | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 100 | \n", "10.13140/rg.2.1.5071.3209 | \n", "dois | \n", "10.13140/rg.2.1.5071.3209 | \n", "[{'identifier': 'https://doi.org/10.13140/rg.2... | \n", "[{'name': 'Tumanishvili, George G.', 'nameType... | \n", "[{'title': 'საკუთრება, როგორც მოთხოვნის უზრუნვ... | \n", "ILIA STATE UNIVERSITY PRESS | \n", "2012 | \n", "[] | \n", "[] | \n", "... | \n", "0 | \n", "2016-06-26T21:41:26Z | \n", "2016-06-26T21:41:27Z | \n", "None | \n", "2020-06-29T13:28:40Z | \n", "rg.rg | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 101 | \n", "10.15468/dl.77nmev | \n", "dois | \n", "10.15468/dl.77nmev | \n", "[] | \n", "[{'name': 'Occdownload Gbif.Org', 'nameType': ... | \n", "[{'title': 'Occurrence Download'}] | \n", "The Global Biodiversity Information Facility | \n", "2020 | \n", "[{'lang': 'eng', 'subject': 'GBIF'}, {'lang': ... | \n", "[] | \n", "... | \n", "0 | \n", "2020-06-11T20:24:17Z | \n", "2020-06-11T20:24:17Z | \n", "None | \n", "2020-06-11T20:24:18Z | \n", "gbif.gbif | \n", "clients | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
102 rows × 51 columns
\n", "