{ "cells": [ { "cell_type": "markdown", "id": "61a040ce-418a-4a97-9850-0813cfb02422", "metadata": {}, "source": [ "## Accessing GBIF data with the Planetary Computer STAC API\n", "\n", "This notebook provides an example of accessing [Global Biodiversity Information Facility](https://planetarycomputer.microsoft.com/dataset/gbif) (GBIF) occurrence data from the Planetary Computer STAC API. Periodic snapshots of the data are stored in Parquet format." ] }, { "cell_type": "code", "execution_count": 1, "id": "377c1939-0449-4a4a-b2e1-5927a438e282", "metadata": {}, "outputs": [], "source": [ "import pystac_client\n", "import planetary_computer" ] }, { "cell_type": "markdown", "id": "8843db71-467d-4537-837c-844b30f0cf68", "metadata": {}, "source": [ "To access the data stored in Azure Blob Storage, we'll use the Planetary Computer's [STAC API](https://planetarycomputer.microsoft.com/api/stac/v1/docs). " ] }, { "cell_type": "code", "execution_count": 2, "id": "f456e340-2e97-4534-9548-28e94b03806b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['gbif-2022-10-01',\n", " 'gbif-2022-09-01',\n", " 'gbif-2022-08-01',\n", " 'gbif-2022-07-01',\n", " 'gbif-2022-06-01',\n", " 'gbif-2022-05-01',\n", " 'gbif-2022-04-01',\n", " 'gbif-2022-03-01',\n", " 'gbif-2022-02-01',\n", " 'gbif-2022-01-01',\n", " 'gbif-2021-12-01',\n", " 'gbif-2021-11-01',\n", " 'gbif-2021-10-01',\n", " 'gbif-2021-09-01',\n", " 'gbif-2021-08-01',\n", " 'gbif-2021-07-01',\n", " 'gbif-2021-06-01',\n", " 'gbif-2021-04-13']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog = pystac_client.Client.open(\n", " \"https://planetarycomputer.microsoft.com/api/stac/v1\",\n", " modifier=planetary_computer.sign_inplace,\n", ")\n", "search = catalog.search(collections=[\"gbif\"])\n", "items = search.get_all_items()\n", "items = {x.id: x for x in items}\n", "list(items)" ] }, { "cell_type": "markdown", "id": "15a7e8ac-5128-49f7-8781-96c578044b99", "metadata": {}, "source": [ "https://sasweb.microsoft.com/Member/Silo/16477We'll take the most recent item." ] }, { "cell_type": "code", "execution_count": 4, "id": "1a786e42-a134-48a9-b8d4-1046b8c78556", "metadata": {}, "outputs": [], "source": [ "item = list(items.values())[0]" ] }, { "cell_type": "markdown", "id": "435ec017-8d3c-49a1-8c03-748ec2ba2613", "metadata": {}, "source": [ "We'll use [Dask](https://docs.dask.org/en/latest/) to read the partitioned Parquet Dataset." ] }, { "cell_type": "code", "execution_count": 10, "id": "aeee8190-2183-4600-8820-c3d323270488", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | gbifid | \n", "datasetkey | \n", "occurrenceid | \n", "kingdom | \n", "phylum | \n", "class | \n", "order | \n", "family | \n", "genus | \n", "species | \n", "infraspecificepithet | \n", "taxonrank | \n", "scientificname | \n", "verbatimscientificname | \n", "verbatimscientificnameauthorship | \n", "countrycode | \n", "locality | \n", "stateprovince | \n", "occurrencestatus | \n", "individualcount | \n", "publishingorgkey | \n", "decimallatitude | \n", "decimallongitude | \n", "coordinateuncertaintyinmeters | \n", "coordinateprecision | \n", "elevation | \n", "elevationaccuracy | \n", "depth | \n", "depthaccuracy | \n", "eventdate | \n", "day | \n", "month | \n", "year | \n", "taxonkey | \n", "specieskey | \n", "basisofrecord | \n", "institutioncode | \n", "collectioncode | \n", "catalognumber | \n", "recordnumber | \n", "identifiedby | \n", "dateidentified | \n", "license | \n", "rightsholder | \n", "recordedby | \n", "typestatus | \n", "establishmentmeans | \n", "lastinterpreted | \n", "mediatype | \n", "issue | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
npartitions=1960 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
\n", " | int64 | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "int32 | \n", "object | \n", "float64 | \n", "float64 | \n", "float64 | \n", "float64 | \n", "float64 | \n", "float64 | \n", "float64 | \n", "float64 | \n", "datetime64[ns] | \n", "int32 | \n", "int32 | \n", "int32 | \n", "int32 | \n", "int32 | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "datetime64[ns] | \n", "object | \n", "object | \n", "object | \n", "object | \n", "object | \n", "datetime64[ns] | \n", "object | \n", "object | \n", "
\n", " | ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
\n", " | ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
\n", " | ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
\n", " | gbifid | \n", "datasetkey | \n", "occurrenceid | \n", "kingdom | \n", "phylum | \n", "class | \n", "order | \n", "family | \n", "genus | \n", "species | \n", "... | \n", "identifiedby | \n", "dateidentified | \n", "license | \n", "rightsholder | \n", "recordedby | \n", "typestatus | \n", "establishmentmeans | \n", "lastinterpreted | \n", "mediatype | \n", "issue | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2141788029 | \n", "4fa7b334-ce0d-4e88-aaae-2e0c138d049e | \n", "URN:catalog:CLO:EBIRD:OBS590769735 | \n", "Animalia | \n", "Chordata | \n", "Aves | \n", "Passeriformes | \n", "Polioptilidae | \n", "Polioptila | \n", "Polioptila caerulea | \n", "... | \n", "[] | \n", "None | \n", "CC_BY_4_0 | \n", "None | \n", "[{'array_element': 'obsr233099'}] | \n", "[] | \n", "None | \n", "2022-09-08 14:29:55.344000 | \n", "[] | \n", "[] | \n", "
1 | \n", "2142511629 | \n", "4fa7b334-ce0d-4e88-aaae-2e0c138d049e | \n", "URN:catalog:CLO:EBIRD:OBS591457037 | \n", "Animalia | \n", "Chordata | \n", "Aves | \n", "Columbiformes | \n", "Columbidae | \n", "Patagioenas | \n", "Patagioenas cayennensis | \n", "... | \n", "[] | \n", "None | \n", "CC_BY_4_0 | \n", "None | \n", "[{'array_element': 'obsr767103'}] | \n", "[] | \n", "None | \n", "2022-09-08 14:29:56.715000 | \n", "[] | \n", "[{'array_element': 'COORDINATE_ROUNDED'}] | \n", "
2 | \n", "2126155116 | \n", "4fa7b334-ce0d-4e88-aaae-2e0c138d049e | \n", "URN:catalog:CLO:EBIRD:OBS592106103 | \n", "Animalia | \n", "Chordata | \n", "Aves | \n", "Accipitriformes | \n", "Pandionidae | \n", "Pandion | \n", "Pandion haliaetus | \n", "... | \n", "[] | \n", "None | \n", "CC_BY_4_0 | \n", "None | \n", "[{'array_element': 'obsr370369'}] | \n", "[] | \n", "None | \n", "2022-09-08 14:29:58.118000 | \n", "[] | \n", "[] | \n", "
3 | \n", "2143587663 | \n", "4fa7b334-ce0d-4e88-aaae-2e0c138d049e | \n", "URN:catalog:CLO:EBIRD:OBS592831487 | \n", "Animalia | \n", "Chordata | \n", "Aves | \n", "Passeriformes | \n", "Icteridae | \n", "Quiscalus | \n", "Quiscalus quiscula | \n", "... | \n", "[] | \n", "None | \n", "CC_BY_4_0 | \n", "None | \n", "[{'array_element': 'obsr29404'}] | \n", "[] | \n", "None | \n", "2022-09-08 14:29:59.965000 | \n", "[] | \n", "[] | \n", "
4 | \n", "2152055267 | \n", "4fa7b334-ce0d-4e88-aaae-2e0c138d049e | \n", "URN:catalog:CLO:EBIRD:OBS593603038 | \n", "Animalia | \n", "Chordata | \n", "Aves | \n", "Passeriformes | \n", "Regulidae | \n", "Regulus | \n", "Regulus calendula | \n", "... | \n", "[] | \n", "None | \n", "CC_BY_4_0 | \n", "None | \n", "[{'array_element': 'obsr383648'}] | \n", "[] | \n", "None | \n", "2022-09-08 14:30:01.862000 | \n", "[] | \n", "[] | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1947843 | \n", "1674906857 | \n", "9e932f70-0c61-11dd-84ce-b8a03c50a862 | \n", "urn:lsid:slu.aqua.rom.sers:ObservedProperty:14... | \n", "Animalia | \n", "Chordata | \n", "Actinopterygii | \n", "Perciformes | \n", "Percidae | \n", "Perca | \n", "Perca fluviatilis | \n", "... | \n", "[] | \n", "None | \n", "CC0_1_0 | \n", "None | \n", "[{'array_element': 'Fiskeriverkets utredningsk... | \n", "[] | \n", "None | \n", "2022-09-25 05:23:40.367000 | \n", "[] | \n", "[{'array_element': 'COORDINATE_ROUNDED'}] | \n", "
1947844 | \n", "2012826837 | \n", "9e932f70-0c61-11dd-84ce-b8a03c50a862 | \n", "urn:lsid:slu.aqua.rom.sers:ObservedProperty:14... | \n", "Animalia | \n", "Arthropoda | \n", "Malacostraca | \n", "Decapoda | \n", "Astacidae | \n", "Astacus | \n", "Astacus astacus | \n", "... | \n", "[] | \n", "None | \n", "CC0_1_0 | \n", "None | \n", "[{'array_element': 'Fiskeriverkets utredningsk... | \n", "[] | \n", "None | \n", "2022-09-25 05:23:40.367000 | \n", "[] | \n", "[{'array_element': 'COORDINATE_ROUNDED'}] | \n", "
1947845 | \n", "1674906890 | \n", "9e932f70-0c61-11dd-84ce-b8a03c50a862 | \n", "urn:lsid:slu.aqua.rom.sers:ObservedProperty:14... | \n", "Animalia | \n", "Chordata | \n", "Actinopterygii | \n", "Salmoniformes | \n", "Salmonidae | \n", "Salmo | \n", "Salmo trutta | \n", "... | \n", "[] | \n", "None | \n", "CC0_1_0 | \n", "None | \n", "[{'array_element': 'Fiskeriverkets utredningsk... | \n", "[] | \n", "None | \n", "2022-09-25 05:23:40.367000 | \n", "[] | \n", "[{'array_element': 'COORDINATE_ROUNDED'}] | \n", "
1947846 | \n", "1675104176 | \n", "9e932f70-0c61-11dd-84ce-b8a03c50a862 | \n", "urn:lsid:slu.aqua.rom.sers:ObservedProperty:14... | \n", "Animalia | \n", "Chordata | \n", "Actinopterygii | \n", "Salmoniformes | \n", "Salmonidae | \n", "Thymallus | \n", "Thymallus thymallus | \n", "... | \n", "[] | \n", "None | \n", "CC0_1_0 | \n", "None | \n", "[{'array_element': 'Fiskeriverkets utredningsk... | \n", "[] | \n", "None | \n", "2022-09-25 05:23:40.367000 | \n", "[] | \n", "[{'array_element': 'COORDINATE_ROUNDED'}] | \n", "
1947847 | \n", "1674906836 | \n", "9e932f70-0c61-11dd-84ce-b8a03c50a862 | \n", "urn:lsid:slu.aqua.rom.sers:ObservedProperty:14... | \n", "Animalia | \n", "Chordata | \n", "Actinopterygii | \n", "Perciformes | \n", "Percidae | \n", "Perca | \n", "Perca fluviatilis | \n", "... | \n", "[] | \n", "None | \n", "CC0_1_0 | \n", "None | \n", "[{'array_element': 'Konsult'}] | \n", "[] | \n", "None | \n", "2022-09-25 05:23:40.367000 | \n", "[] | \n", "[{'array_element': 'COORDINATE_ROUNDED'}] | \n", "
1947848 rows × 50 columns
\n", "