{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import geopandas as gpd\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import requests\n",
"import yaml\n",
"\n",
"from geopandas.tools import sjoin\n",
"import plotly.graph_objects as go"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load parameters to populate iNaturalist API request from YAML configuration file `params.yaml`"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# API docs in https://www.inaturalist.org/pages/api+reference\n",
"params = yaml.load(open('params.yml'), yaml.SafeLoader)\n",
"api_params = params['api']\n",
"poly_endpoint = params['join']['url']\n",
"api_baseurl = params['api']['url']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load polygons to be intersected with iNaturalist point data (ideally endpoint serves GeoJSON)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dfpoly = gpd.read_file(poly_endpoint)"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"Define the method to fetch the data from the iNaturalist REST API"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def get_data(api_params={}):\n",
" print(\"⏳ Fetch iNaturalist data from API... page %d\" % api_params['page'])\n",
" r = requests.get(api_baseurl, params=api_params, headers={\n",
" 'Content-Type': 'application/json'})\n",
" if r.status_code != 200:\n",
" raise Exception(\n",
" 'Yikes - iNaturalist API error: {}'.format(\n",
" r.status_code)\n",
" )\n",
" df = pd.json_normalize(r.json(), record_path=['results'])\n",
" # Extract lat-Lon and keep critical columns only to speed things up\n",
" df[['latitude', 'longitude']] = df['location'].str.split(',', 1, expand=True)\n",
" df = df[['latitude', 'longitude', 'taxon.id', 'taxon.name', 'taxon.preferred_common_name']]\n",
"\n",
" if len(df.index) == api_params['per_page']: # more pages to fetch\n",
" api_params['page'] += 1\n",
" df = df.append(get_data(api_params))\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"⏳ Fetch iNaturalist data from API... page 1\n",
"⏳ Fetch iNaturalist data from API... page 2\n",
"⏳ Fetch iNaturalist data from API... page 3\n",
"⏳ Fetch iNaturalist data from API... page 4\n",
"⏳ Fetch iNaturalist data from API... page 5\n",
"⏳ Fetch iNaturalist data from API... page 6\n",
"⏳ Fetch iNaturalist data from API... page 7\n",
"⏳ Fetch iNaturalist data from API... page 8\n",
"⏳ Fetch iNaturalist data from API... page 9\n",
"⏳ Fetch iNaturalist data from API... page 10\n",
"⏳ Fetch iNaturalist data from API... page 11\n",
"⏳ Fetch iNaturalist data from API... page 12\n",
"⏳ Fetch iNaturalist data from API... page 13\n",
"⏳ Fetch iNaturalist data from API... page 14\n",
"⏳ Fetch iNaturalist data from API... page 15\n",
"⏳ Fetch iNaturalist data from API... page 16\n",
"⏳ Fetch iNaturalist data from API... page 17\n",
"⏳ Fetch iNaturalist data from API... page 18\n",
"✔ 3480 records downloaded from the iNaturalist API\n"
]
}
],
"source": [
"df = get_data(api_params)\n",
"print(\"✔ %d records downloaded from the iNaturalist API\" % len(df.index))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate inner join and manipulate data to be used in the plots below"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))\n",
"gdf.set_crs(crs='EPSG:4326', inplace=True)\n",
"pointInPolys = sjoin(gdf, dfpoly, how='left')\n",
"\n",
"def calc_within_area(row):\n",
" return ('Outside' if pd.isnull(row['OBJECTID']) else 'Inside')\n",
"\n",
"pointInPolys['state'] = pointInPolys.apply(calc_within_area, axis=1)\n",
"dftaxon = pointInPolys.groupby(['taxon.id','state']).size().reset_index(name='counts')\n",
"subdf = pointInPolys.loc[pointInPolys['Name'] != pd.NA]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"pd.options.plotting.backend = \"plotly\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"alignmentgroup": "True",
"hovertemplate": "=%{x}
# Observations=%{y}