{ "cells": [ { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "# Check `GDS` Python stack\n", "\n", "This notebook checks all software requirements for the course Geographic Data Science are correctly installed. \n", "\n", "A successful run of the notebook implies no errors returned in any cell *and* every cell beyond the first one returning a printout of `True`. This ensures a correct environment installed." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! echo \"You are running version $GDS_ENV_VERSION of the GDS env\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pull list of all libs specified in the stack:" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "tags": [] }, "outputs": [], "source": [ "f = './gds_py.yml'\n", "bespoke = {\n", " 'h3_py': 'h3',\n", " 'python_duckdb': 'duckdb',\n", " 'scikit_learn': 'sklearn',\n", " 'scikit_mobility': 'skmob',\n", " 'xarray_spatial': 'xrspatial'\n", "}\n", "libs = []\n", "with open(f, 'r') as fo:\n", " for l in fo.readlines():\n", " if ('- ' in l) and ('#' not in l) and (':' not in l):\n", " l = l.strip('\\n').strip(' ').strip('-').strip(' ')\n", " l = l.split('>=')[0]\n", " libs.append(l)\n", "libs = [i for i in libs if i not in [\n", " 'conda-forge', 'libgdal-arrow-parquet', 'pip', 'cenpy', 'protobuf']\n", " ]\n", "libs = [i.replace('-', '_') for i in libs]\n", "libs = [bespoke[i] if i in bespoke else i for i in libs]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Import each library:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "from importlib import import_module\n", "for lib in libs:\n", " print(f'Attempting to import {lib}')\n", " import_module(lib)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Bespoke imports:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- `pysal`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pysal\n", "import pysal.lib\n", "import pysal.explore\n", "import pysal.model\n", "import pysal.viz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Protobuf" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "tags": [] }, "outputs": [], "source": [ "from google.protobuf.internal import api_implementation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- `cenpy`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Issues with the Census API\n", "import traceback\n", "try:\n", " import cenpy\n", "except Exception:\n", " traceback.print_exc()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hardcoded tests" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import geopandas\n", "import geopandas as gpd\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pysal import lib as libps\n", "shp = libps.examples.get_path('columbus.shp')\n", "db = geopandas.read_file(shp)\n", "db.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "db[['AREA', 'PERIMETER']].to_feather('db.feather')\n", "tst = pd.read_feather('db.feather')\n", "! rm db.feather" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "db.to_parquet('db.pq')\n", "tst = gpd.read_parquet('db.pq')\n", "! rm db.pq" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "f, ax = plt.subplots(1)\n", "db.plot(facecolor='yellow', ax=ax)\n", "ax.set_axis_off()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "db.crs = 'EPSG:26918'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "db_wgs84 = db.to_crs(epsg=4326)\n", "db_wgs84.plot()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pysal.viz import splot\n", "from splot.mapping import vba_choropleth\n", "\n", "f, ax = vba_choropleth(db['INC'], db['HOVAL'], db)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "db.plot(column='INC', scheme='fisher_jenks', cmap=plt.matplotlib.cm.Blues)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import osmnx\n", "city = osmnx.geocode_to_gdf('Berkeley, California, US')\n", "osmnx.plot_footprints(osmnx.project_gdf(city));" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```python\n", "import os\n", "os.environ['USE_PYGEOS'] = '1'\n", "import geopandas\n", "import pyrosm\n", "from pyrosm import get_data\n", "# Download data for the city of Helsinki\n", "fp = get_data(\"Helsinki\", directory=\"./\")\n", "print(fp)\n", "\n", "# Get filepath to test PBF dataset\n", "fp = pyrosm.get_data(\"test_pbf\")\n", "print(\"Filepath to test data:\", fp)\n", "\n", "# Initialize the OSM object\n", "osm = pyrosm.OSM(fp)\n", "\n", "# See the type\n", "print(\"Type of 'osm' instance: \", type(osm))\n", "\n", "from pyrosm import get_data\n", "\n", "# Pyrosm comes with a couple of test datasets\n", "# that can be used straight away without\n", "# downloading anything\n", "fp = get_data(\"test_pbf\")\n", "\n", "# Initialize the OSM parser object\n", "osm = pyrosm.OSM(fp)\n", "\n", "# Read all drivable roads\n", "# =======================\n", "drive_net = osm.get_network(network_type=\"driving\")\n", "drive_net.plot()\n", "\n", "! rm Helsinki.osm.pbf\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import rasterio as rio\n", "import contextily as ctx\n", "import pysal as ps\n", "tl = ctx.providers.CartoDB.Positron\n", "\n", "db = geopandas.read_file(ps.lib.examples.get_path('us48.shp'))\n", "db.crs = \"EPSG:4326\"\n", "dbp = db.to_crs(epsg=3857)\n", "w, s, e, n = dbp.total_bounds\n", "# Download raster\n", "_ = ctx.bounds2raster(w, s, e, n, 'us.tif', source=tl)\n", "# Load up and plot\n", "source = rio.open('us.tif', 'r')\n", "red = source.read(1)\n", "green = source.read(2)\n", "blue = source.read(3)\n", "pix = np.dstack((red, green, blue))\n", "bounds = (source.bounds.left, source.bounds.right, \\\n", " source.bounds.bottom, source.bounds.top)\n", "f = plt.figure(figsize=(6, 6))\n", "ax = plt.imshow(pix, extent=bounds)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ax = db.plot()\n", "ctx.add_basemap(ax, crs=db.crs.to_string())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from ipyleaflet import Map, basemaps, basemap_to_tiles, SplitMapControl\n", "\n", "m = Map(center=(42.6824, 365.581), zoom=5)\n", "\n", "right_layer = basemap_to_tiles(basemaps.NASAGIBS.ModisTerraTrueColorCR, \"2017-11-11\")\n", "left_layer = basemap_to_tiles(basemaps.NASAGIBS.ModisAquaBands721CR, \"2017-11-11\")\n", "\n", "control = SplitMapControl(left_layer=left_layer, right_layer=right_layer)\n", "m.add_control(control)\n", "\n", "m" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Convert us.tiff to COG with rio-cogeo\n", "! rio cogeo create us.tif us_cog.tif\n", "! rio cogeo validate us_cog.tif\n", "! rio info us_cog.tif\n", "! rio warp --dst-crs EPSG:4326 us_cog.tif us_cog_ll.tif" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# rioxarray + xarray_leaflet\n", "import xarray, rioxarray\n", "us = rioxarray.open_rasterio(\n", " \"us_cog.tif\"\n", ").sel(\n", " band=1, \n", " y=slice(7000000, 5000000),\n", " x=slice(-10000000, -8000000)\n", ")\n", "import xarray_leaflet\n", "from ipyleaflet import Map\n", "\n", "m = Map(zoom=1, basemap=basemaps.CartoDB.DarkMatter)\n", "m" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "l = us.astype(\n", " float\n", ").rio.reproject(\n", " \"EPSG:4326\"\n", ").leaflet.plot(m)\n", "l.interact(opacity=(0.0,1.0))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! rm us.tif us_cog.tif us_cog_ll.tif\n", "! rm -rf cache/" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "from IPython.display import GeoJSON\n", "\n", "GeoJSON({\n", " \"type\": \"Feature\",\n", " \"geometry\": {\n", " \"type\": \"Point\",\n", " \"coordinates\": [-118.4563712, 34.0163116]\n", " }\n", "})" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" } }, "nbformat": 4, "nbformat_minor": 4 }