{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# List Objects in Production\n", "\n", "**Task:** Connect to the DataONE production environment, retrieve the list of Objects, and print out the list." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, load the `dataone` R package and connect to the production Coordinating Node:\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "library(dataone)\n", "cn <- CNode()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`listObjects` accepts a number of optional arguments (see `?listObjects`) such as `count` which controls the number of objects that are returned:" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<dl>\n", "\t<dt>$objectInfo</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$identifier</dt>\n", "\t\t<dd>'0000120ce277dbb2e140d74b50ca23e5'</dd>\n", "\t<dt>$formatId</dt>\n", "\t\t<dd>'http://www.isotc211.org/2005/gmd-pangaea'</dd>\n", "\t<dt>$checksum</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$text</dt>\n", "\t\t<dd>'0000120ce277dbb2e140d74b50ca23e5'</dd>\n", "\t<dt>$.attrs</dt>\n", "\t\t<dd><strong>algorithm:</strong> 'MD5'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$dateSysMetadataModified</dt>\n", "\t\t<dd>'2018-04-20T07:59:45.497+00:00'</dd>\n", "\t<dt>$size</dt>\n", "\t\t<dd>'19541'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$objectInfo</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$identifier</dt>\n", "\t\t<dd>'000026213216f47287f0d3027f3c4be3'</dd>\n", "\t<dt>$formatId</dt>\n", "\t\t<dd>'http://www.isotc211.org/2005/gmd-pangaea'</dd>\n", "\t<dt>$checksum</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$text</dt>\n", "\t\t<dd>'000026213216f47287f0d3027f3c4be3'</dd>\n", "\t<dt>$.attrs</dt>\n", "\t\t<dd><strong>algorithm:</strong> 'MD5'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$dateSysMetadataModified</dt>\n", "\t\t<dd>'2018-04-20T05:09:36.311+00:00'</dd>\n", "\t<dt>$size</dt>\n", "\t\t<dd>'26256'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$objectInfo</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$identifier</dt>\n", "\t\t<dd>'0000aa6924377b6a7e5ab59bcec5d4f3'</dd>\n", "\t<dt>$formatId</dt>\n", "\t\t<dd>'http://www.isotc211.org/2005/gmd-pangaea'</dd>\n", "\t<dt>$checksum</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$text</dt>\n", "\t\t<dd>'0000aa6924377b6a7e5ab59bcec5d4f3'</dd>\n", "\t<dt>$.attrs</dt>\n", "\t\t<dd><strong>algorithm:</strong> 'MD5'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$dateSysMetadataModified</dt>\n", "\t\t<dd>'2018-02-17T03:01:16.406+00:00'</dd>\n", "\t<dt>$size</dt>\n", "\t\t<dd>'35084'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$objectInfo</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$identifier</dt>\n", "\t\t<dd>'0000d11ff42b22915fcce5cfa6027040'</dd>\n", "\t<dt>$formatId</dt>\n", "\t\t<dd>'http://www.isotc211.org/2005/gmd-pangaea'</dd>\n", "\t<dt>$checksum</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$text</dt>\n", "\t\t<dd>'0000d11ff42b22915fcce5cfa6027040'</dd>\n", "\t<dt>$.attrs</dt>\n", "\t\t<dd><strong>algorithm:</strong> 'MD5'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$dateSysMetadataModified</dt>\n", "\t\t<dd>'2018-01-06T10:43:32.073+00:00'</dd>\n", "\t<dt>$size</dt>\n", "\t\t<dd>'35257'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$objectInfo</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$identifier</dt>\n", "\t\t<dd>'0000eb4ff1fc59ae6c33a4981e00eabf'</dd>\n", "\t<dt>$formatId</dt>\n", "\t\t<dd>'http://www.isotc211.org/2005/gmd-pangaea'</dd>\n", "\t<dt>$checksum</dt>\n", "\t\t<dd><dl>\n", "\t<dt>$text</dt>\n", "\t\t<dd>'0000eb4ff1fc59ae6c33a4981e00eabf'</dd>\n", "\t<dt>$.attrs</dt>\n", "\t\t<dd><strong>algorithm:</strong> 'MD5'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$dateSysMetadataModified</dt>\n", "\t\t<dd>'2018-01-08T11:18:27.291+00:00'</dd>\n", "\t<dt>$size</dt>\n", "\t\t<dd>'49904'</dd>\n", "</dl>\n", "</dd>\n", "\t<dt>$.attrs</dt>\n", "\t\t<dd><style>\n", ".dl-inline {width: auto; margin:0; padding: 0}\n", ".dl-inline>dt, .dl-inline>dd {float: none; width: auto; display: inline-block}\n", ".dl-inline>dt::after {content: \":\\0020\"; padding-right: .5ex}\n", ".dl-inline>dt:not(:first-of-type) {padding-left: .5ex}\n", "</style><dl class=dl-inline><dt>count</dt><dd>'5'</dd><dt>start</dt><dd>'0'</dd><dt>total</dt><dd>'2897219'</dd></dl>\n", "</dd>\n", "</dl>\n" ], "text/latex": [ "\\begin{description}\n", "\\item[\\$objectInfo] \\begin{description}\n", "\\item[\\$identifier] '0000120ce277dbb2e140d74b50ca23e5'\n", "\\item[\\$formatId] 'http://www.isotc211.org/2005/gmd-pangaea'\n", "\\item[\\$checksum] \\begin{description}\n", "\\item[\\$text] '0000120ce277dbb2e140d74b50ca23e5'\n", "\\item[\\$.attrs] \\textbf{algorithm:} 'MD5'\n", "\\end{description}\n", "\n", "\\item[\\$dateSysMetadataModified] '2018-04-20T07:59:45.497+00:00'\n", "\\item[\\$size] '19541'\n", "\\end{description}\n", "\n", "\\item[\\$objectInfo] \\begin{description}\n", "\\item[\\$identifier] '000026213216f47287f0d3027f3c4be3'\n", "\\item[\\$formatId] 'http://www.isotc211.org/2005/gmd-pangaea'\n", "\\item[\\$checksum] \\begin{description}\n", "\\item[\\$text] '000026213216f47287f0d3027f3c4be3'\n", "\\item[\\$.attrs] \\textbf{algorithm:} 'MD5'\n", "\\end{description}\n", "\n", "\\item[\\$dateSysMetadataModified] '2018-04-20T05:09:36.311+00:00'\n", "\\item[\\$size] '26256'\n", "\\end{description}\n", "\n", "\\item[\\$objectInfo] \\begin{description}\n", "\\item[\\$identifier] '0000aa6924377b6a7e5ab59bcec5d4f3'\n", "\\item[\\$formatId] 'http://www.isotc211.org/2005/gmd-pangaea'\n", "\\item[\\$checksum] \\begin{description}\n", "\\item[\\$text] '0000aa6924377b6a7e5ab59bcec5d4f3'\n", "\\item[\\$.attrs] \\textbf{algorithm:} 'MD5'\n", "\\end{description}\n", "\n", "\\item[\\$dateSysMetadataModified] '2018-02-17T03:01:16.406+00:00'\n", "\\item[\\$size] '35084'\n", "\\end{description}\n", "\n", "\\item[\\$objectInfo] \\begin{description}\n", "\\item[\\$identifier] '0000d11ff42b22915fcce5cfa6027040'\n", "\\item[\\$formatId] 'http://www.isotc211.org/2005/gmd-pangaea'\n", "\\item[\\$checksum] \\begin{description}\n", "\\item[\\$text] '0000d11ff42b22915fcce5cfa6027040'\n", "\\item[\\$.attrs] \\textbf{algorithm:} 'MD5'\n", "\\end{description}\n", "\n", "\\item[\\$dateSysMetadataModified] '2018-01-06T10:43:32.073+00:00'\n", "\\item[\\$size] '35257'\n", "\\end{description}\n", "\n", "\\item[\\$objectInfo] \\begin{description}\n", "\\item[\\$identifier] '0000eb4ff1fc59ae6c33a4981e00eabf'\n", "\\item[\\$formatId] 'http://www.isotc211.org/2005/gmd-pangaea'\n", "\\item[\\$checksum] \\begin{description}\n", "\\item[\\$text] '0000eb4ff1fc59ae6c33a4981e00eabf'\n", "\\item[\\$.attrs] \\textbf{algorithm:} 'MD5'\n", "\\end{description}\n", "\n", "\\item[\\$dateSysMetadataModified] '2018-01-08T11:18:27.291+00:00'\n", "\\item[\\$size] '49904'\n", "\\end{description}\n", "\n", "\\item[\\$.attrs] \\begin{description*}\n", "\\item[count] '5'\n", "\\item[start] '0'\n", "\\item[total] '2897219'\n", "\\end{description*}\n", "\n", "\\end{description}\n" ], "text/markdown": [ "$objectInfo\n", ": $identifier\n", ": '0000120ce277dbb2e140d74b50ca23e5'\n", "$formatId\n", ": 'http://www.isotc211.org/2005/gmd-pangaea'\n", "$checksum\n", ": $text\n", ": '0000120ce277dbb2e140d74b50ca23e5'\n", "$.attrs\n", ": **algorithm:** 'MD5'\n", "\n", "\n", "\n", "$dateSysMetadataModified\n", ": '2018-04-20T07:59:45.497+00:00'\n", "$size\n", ": '19541'\n", "\n", "\n", "\n", "$objectInfo\n", ": $identifier\n", ": '000026213216f47287f0d3027f3c4be3'\n", "$formatId\n", ": 'http://www.isotc211.org/2005/gmd-pangaea'\n", "$checksum\n", ": $text\n", ": '000026213216f47287f0d3027f3c4be3'\n", "$.attrs\n", ": **algorithm:** 'MD5'\n", "\n", "\n", "\n", "$dateSysMetadataModified\n", ": '2018-04-20T05:09:36.311+00:00'\n", "$size\n", ": '26256'\n", "\n", "\n", "\n", "$objectInfo\n", ": $identifier\n", ": '0000aa6924377b6a7e5ab59bcec5d4f3'\n", "$formatId\n", ": 'http://www.isotc211.org/2005/gmd-pangaea'\n", "$checksum\n", ": $text\n", ": '0000aa6924377b6a7e5ab59bcec5d4f3'\n", "$.attrs\n", ": **algorithm:** 'MD5'\n", "\n", "\n", "\n", "$dateSysMetadataModified\n", ": '2018-02-17T03:01:16.406+00:00'\n", "$size\n", ": '35084'\n", "\n", "\n", "\n", "$objectInfo\n", ": $identifier\n", ": '0000d11ff42b22915fcce5cfa6027040'\n", "$formatId\n", ": 'http://www.isotc211.org/2005/gmd-pangaea'\n", "$checksum\n", ": $text\n", ": '0000d11ff42b22915fcce5cfa6027040'\n", "$.attrs\n", ": **algorithm:** 'MD5'\n", "\n", "\n", "\n", "$dateSysMetadataModified\n", ": '2018-01-06T10:43:32.073+00:00'\n", "$size\n", ": '35257'\n", "\n", "\n", "\n", "$objectInfo\n", ": $identifier\n", ": '0000eb4ff1fc59ae6c33a4981e00eabf'\n", "$formatId\n", ": 'http://www.isotc211.org/2005/gmd-pangaea'\n", "$checksum\n", ": $text\n", ": '0000eb4ff1fc59ae6c33a4981e00eabf'\n", "$.attrs\n", ": **algorithm:** 'MD5'\n", "\n", "\n", "\n", "$dateSysMetadataModified\n", ": '2018-01-08T11:18:27.291+00:00'\n", "$size\n", ": '49904'\n", "\n", "\n", "\n", "$.attrs\n", ": count\n", ": '5'start\n", ": '0'total\n", ": '2897219'\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "$objectInfo\n", "$objectInfo$identifier\n", "[1] \"0000120ce277dbb2e140d74b50ca23e5\"\n", "\n", "$objectInfo$formatId\n", "[1] \"http://www.isotc211.org/2005/gmd-pangaea\"\n", "\n", "$objectInfo$checksum\n", "$objectInfo$checksum$text\n", "[1] \"0000120ce277dbb2e140d74b50ca23e5\"\n", "\n", "$objectInfo$checksum$.attrs\n", "algorithm \n", " \"MD5\" \n", "\n", "\n", "$objectInfo$dateSysMetadataModified\n", "[1] \"2018-04-20T07:59:45.497+00:00\"\n", "\n", "$objectInfo$size\n", "[1] \"19541\"\n", "\n", "\n", "$objectInfo\n", "$objectInfo$identifier\n", "[1] \"000026213216f47287f0d3027f3c4be3\"\n", "\n", "$objectInfo$formatId\n", "[1] \"http://www.isotc211.org/2005/gmd-pangaea\"\n", "\n", "$objectInfo$checksum\n", "$objectInfo$checksum$text\n", "[1] \"000026213216f47287f0d3027f3c4be3\"\n", "\n", "$objectInfo$checksum$.attrs\n", "algorithm \n", " \"MD5\" \n", "\n", "\n", "$objectInfo$dateSysMetadataModified\n", "[1] \"2018-04-20T05:09:36.311+00:00\"\n", "\n", "$objectInfo$size\n", "[1] \"26256\"\n", "\n", "\n", "$objectInfo\n", "$objectInfo$identifier\n", "[1] \"0000aa6924377b6a7e5ab59bcec5d4f3\"\n", "\n", "$objectInfo$formatId\n", "[1] \"http://www.isotc211.org/2005/gmd-pangaea\"\n", "\n", "$objectInfo$checksum\n", "$objectInfo$checksum$text\n", "[1] \"0000aa6924377b6a7e5ab59bcec5d4f3\"\n", "\n", "$objectInfo$checksum$.attrs\n", "algorithm \n", " \"MD5\" \n", "\n", "\n", "$objectInfo$dateSysMetadataModified\n", "[1] \"2018-02-17T03:01:16.406+00:00\"\n", "\n", "$objectInfo$size\n", "[1] \"35084\"\n", "\n", "\n", "$objectInfo\n", "$objectInfo$identifier\n", "[1] \"0000d11ff42b22915fcce5cfa6027040\"\n", "\n", "$objectInfo$formatId\n", "[1] \"http://www.isotc211.org/2005/gmd-pangaea\"\n", "\n", "$objectInfo$checksum\n", "$objectInfo$checksum$text\n", "[1] \"0000d11ff42b22915fcce5cfa6027040\"\n", "\n", "$objectInfo$checksum$.attrs\n", "algorithm \n", " \"MD5\" \n", "\n", "\n", "$objectInfo$dateSysMetadataModified\n", "[1] \"2018-01-06T10:43:32.073+00:00\"\n", "\n", "$objectInfo$size\n", "[1] \"35257\"\n", "\n", "\n", "$objectInfo\n", "$objectInfo$identifier\n", "[1] \"0000eb4ff1fc59ae6c33a4981e00eabf\"\n", "\n", "$objectInfo$formatId\n", "[1] \"http://www.isotc211.org/2005/gmd-pangaea\"\n", "\n", "$objectInfo$checksum\n", "$objectInfo$checksum$text\n", "[1] \"0000eb4ff1fc59ae6c33a4981e00eabf\"\n", "\n", "$objectInfo$checksum$.attrs\n", "algorithm \n", " \"MD5\" \n", "\n", "\n", "$objectInfo$dateSysMetadataModified\n", "[1] \"2018-01-08T11:18:27.291+00:00\"\n", "\n", "$objectInfo$size\n", "[1] \"49904\"\n", "\n", "\n", "$.attrs\n", " count start total \n", " \"5\" \"0\" \"2897219\" \n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "listObjects(cn, count = 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As another example, we can filter to _just_ CSV objects created within a time frame:" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "5" ], "text/latex": [ "5" ], "text/markdown": [ "5" ], "text/plain": [ "[1] 5" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "csvs <- listObjects(cn, \n", " fromDate = \"2020-01-01T00:00:00.000Z\", \n", " toDate = \"2021-01-01T00:00:00.000Z\", \n", " formatId = \"text/csv\", \n", " count = 5)\n", "length(csvs$objectInfo)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can see we received five results like we requested but there are many more available:\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<strong>total:</strong> '28191'" ], "text/latex": [ "\\textbf{total:} '28191'" ], "text/markdown": [ "**total:** '28191'" ], "text/plain": [ " total \n", "\"28191\" " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "csvs$.attrs[3]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Knowing this, we can use the `start` argument to page through the results:\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "page_two <- listObjects(cn, \n", " fromDate = \"2020-01-01T00:00:00.000Z\", \n", " toDate = \"2021-01-01T00:00:00.000Z\", \n", " formatId = \"text/csv\", \n", " count = 5,\n", " start = 5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "page_two" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.0.2" } }, "nbformat": 4, "nbformat_minor": 4 }