{ "cells": [ { "cell_type": "code", "execution_count": 64, "id": "22b944c0-d8e0-478c-a3d8-ea670d90cc2f", "metadata": {}, "outputs": [], "source": [ "import yaml" ] }, { "cell_type": "code", "execution_count": 65, "id": "2607ecaa-758c-4a3e-80f2-050f2c73df8b", "metadata": {}, "outputs": [], "source": [ "# load menu\n", "with open(\"mnt/city-directories/01-user-input/menu.yml\", 'r') as f:\n", " menu = yaml.safe_load(f)" ] }, { "cell_type": "code", "execution_count": 66, "id": "a6992e26-b62f-41fc-85e7-66c95e7380e9", "metadata": {}, "outputs": [], "source": [ "if menu['all_stats']:\n", " import os\n", " import glob\n", " import math\n", " import geopandas as gpd\n", " import pandas as pd\n", " import numpy as np\n", " from io import StringIO\n", " import requests\n", " from sklearn.preprocessing import MinMaxScaler\n", " from shapely.geometry import shape\n", " from shapely.ops import unary_union\n", " import pint\n", " import folium\n", " from pathlib import Path\n", " import matplotlib.pyplot as plt\n", " import requests\n", " import re\n", " import rasterio\n", " from rasterio.mask import mask\n", " from shapely.geometry import Point\n", " from fiona.crs import from_epsg\n", " from nbconvert import MarkdownExporter\n", " import nbformat\n", " import base64\n", " import pickle\n", " import plotly.graph_objects as go\n", " import seaborn as sns\n", " import plotly.express as px" ] }, { "cell_type": "code", "execution_count": 67, "id": "0ca8d4ff-ae23-4850-95f6-4a374d77ab88", "metadata": {}, "outputs": [], "source": [ "url = \"https://raw.githubusercontent.com/compoundrisk/monitor/databricks/src/country-groups.csv\"\n", "country_groups = pd.read_csv(url)\n", "\n", "# Source helper functions\n", "helpers_url = \"https://raw.githubusercontent.com/compoundrisk/monitor/databricks/src/fns/helpers.R\"\n", "helpers_code = requests.get(helpers_url).text\n", "\n", "# Define tolatin function\n", "def tolatin(x):\n", " return stri_trans_general(x, id=\"Latin-ASCII\")\n", "\n", "# Define normalize function\n", "def normalize(x):\n", " x_min = np.min(x)\n", " x_max = np.max(x)\n", " return (x - x_min) / (x_max - x_min)\n", "\n", "def print_text(x, linebreaks=2):\n", " print(x + \"\\n\" + \"
\" * linebreaks)" ] }, { "cell_type": "code", "execution_count": 68, "id": "a5df3a99-85dd-4b6b-93d7-bbd0a68679f7", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# SET UP ##############################################\n", "\n", "# load city inputs files, to be updated for each city scan\n", "with open(\"city_inputs.yml\", 'r') as f:\n", " city_inputs = yaml.safe_load(f)\n", "\n", "city = city_inputs['city_name'].replace(' ', '_').lower()\n", "country = city_inputs['country_name'].replace(' ', '_').lower()\n", "# load global inputs, such as data sources that generally remain the same across scans\n", "with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", "# Read AOI shapefile --------\n", "# transform the input shp to correct prj (epsg 4326)\n", "aoi_file = gpd.read_file(city_inputs['AOI_path']).to_crs(epsg = 4326)\n", "features = aoi_file.geometry\n", "\n", "# Define output folder ---------\n", "output_folder = Path('mnt/city-directories/02-process-output')\n", "# Define render folder ---------\n", "render_folder = Path('mnt/city-directories/03-render-output')\n", "multi_scan_folder = Path('multi-scan-materials')\n", "\n", "if not os.path.exists(output_folder):\n", " os.mkdir(output_folder)\n", "\n", "fig, ax = plt.subplots()\n", "features.plot(ax=ax)\n", "plt.title('AOI')\n", "plt.xlabel('Longitude')\n", "plt.ylabel('Latitude')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 69, "id": "18954664-49f8-43b5-96c1-7faef1809401", "metadata": {}, "outputs": [], "source": [ "#AOI AREA\n", "def calculate_aoi_area(features):\n", " # Create a unit registry\n", " ureg = pint.UnitRegistry()\n", "\n", " # Combine geometries using unary_union from shapely\n", " combined_geometry = unary_union(features)\n", "\n", " # Calculate the area in square kilometers\n", " area_km2 = combined_geometry.area / 1e6 # Convert square meters to square kilometers\n", "\n", " # Print the result using the pint library for unit formatting\n", " area_quantity = area_km2 * ureg.km**2\n", " return area_quantity.to('km^2')\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 70, "id": "c1cc951b-ac81-470a-b400-dd38767a235b", "metadata": {}, "outputs": [], "source": [ "#Climate\n", "def get_koeppen_classification():\n", " # Load global inputs from YAML file\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " # Calculate centroid of AOI\n", " centroid = features.centroid.values[0]\n", "\n", " # Retrieve centroid coordinates\n", " coords = {' Lon': centroid.x, 'Lat': centroid.y}\n", "\n", " # Read Köppen climate classification file\n", " koeppen_file_path = global_inputs.get('koeppen_source') #use an alternate dataset\n", " koeppen = pd.read_csv(koeppen_file_path)\n", "\n", " # Filter Köppen data for the region around the centroid with a buffer of 0.5 degrees\n", " lon_min, lon_max = coords[' Lon'] - 0.5, coords[' Lon'] + 0.5\n", " lat_min, lat_max = coords['Lat'] - 0.5, coords['Lat'] + 0.5\n", " koeppen_city = koeppen[\n", " (koeppen[' Lon'].between(lon_min, lon_max)) &\n", " (koeppen['Lat'].between(lat_min, lat_max))\n", " ][' Cls'].unique()\n", "\n", " # Print Köppen climate classification\n", " koeppen_text = ', '.join(koeppen_city)\n", " print(f\"Köppen climate classification: {koeppen_text} (See https://en.wikipedia.org/wiki/Köppen_climate_classification for classes)\")\n", "\n", " # Return Köppen climate classification if needed\n", " return koeppen_city\n" ] }, { "cell_type": "code", "execution_count": 71, "id": "39d2f590-64e2-46d8-8fda-62b3eb6019b3", "metadata": {}, "outputs": [], "source": [ "#Age Distribution by World Pop\n", "def age_stats():\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " # Get city inputs\n", " city_inputs = global_inputs.get('city_inputs')\n", "\n", " if menu['demographics']: \n", " age_file = os.path.join(output_folder, f\"{city}_demographics.csv\") \n", " if os.path.exists(age_file): \n", " pass\n", " else:\n", " print(\"Demographics file does not exist.\")\n", " return \n", " pop_dist_group_wp = pd.read_csv(age_file)\n", "\n", " \n", " pop_dist_group_wp = pop_dist_group_wp.rename(columns={\"age_group\": \"Age_Bracket\", \"sex\": \"Sex\"})\n", " pop_dist_group_wp['Count'] = pd.to_numeric(pop_dist_group_wp['population'], errors='coerce')\n", " pop_dist_group_wp['Age_Bracket'] = pop_dist_group_wp['Age_Bracket'].replace({'<1': '0-4', '1-4': '0-4'})\n", " pop_dist_group_wp = pop_dist_group_wp.groupby(['Age_Bracket', 'Sex']).agg(Count=('Count', 'sum')).reset_index()\n", "\n", " \n", " pop_dist_group_wp['Percentage'] = pop_dist_group_wp.groupby('Sex')['Count'].transform(lambda x: x / x.sum())\n", " pop_dist_group_wp['Sexed_Percent'] = pop_dist_group_wp.groupby('Sex')['Count'].transform(lambda x: x / x.sum())\n", " pop_dist_group_wp['Sexed_Percent_cum'] = pop_dist_group_wp.groupby('Sex')['Sexed_Percent'].cumsum()\n", "\n", " # Plot age-sex distribution\n", " sns.barplot(data=pop_dist_group_wp, x='Age_Bracket', y='Percentage', hue='Sex', dodge=True)\n", " plt.title(f\"Population distribution in {city} by sex\")\n", " plt.xlabel(\"Age Bracket\")\n", " plt.ylabel(\"Percentage\")\n", " plt.legend(title=\"Sex\", loc=\"upper right\")\n", " plt.xticks(rotation=45)\n", " plt.tight_layout()\n", " render_path = os.path.join(render_folder, \"age_stats.png\")\n", " plt.savefig(render_path)\n", " plt.close()\n", "\n", " plt.show()\n", "\n", " # Plotly\n", " fig = px.bar(pop_dist_group_wp, x='Age_Bracket', y='Percentage', color='Sex', barmode='group', \n", " title=f\"Population distribution in {city} by sex\", \n", " labels={'Age_Bracket': 'Age Bracket', 'Percentage': 'Percentage', 'Sex': 'Sex'})\n", "\n", " \n", " fig.update_layout(xaxis_title=\"Age Bracket\", yaxis_title=\"Percentage\", legend_title=\"Sex\",plot_bgcolor='white') #swap the colors\n", " fig.update_xaxes(tickangle=45)\n", "\n", " \n", " fig.show()\n", " fig.write_html(render_path.replace('.png', '.html'))\n", " under5 = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'] == '0-4']['Percentage'].sum()\n", " youth = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'].isin(['15-19', '20-24'])]['Percentage'].sum()\n", " working_age = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'].isin(['15-64'])]['Percentage'].sum()\n", " elderly = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'].isin(['60-64', '65-69', '70-74', '75-79', '80+'])]['Percentage'].sum()\n", " female_pct = pop_dist_group_wp[pop_dist_group_wp['Sex'] == 'f']['Percentage'].sum()\n", " sex_ratio = (1 - female_pct) / female_pct * 100\n", " reproductive_age = pop_dist_group_wp[(pop_dist_group_wp['Sex'] == 'f') & (pop_dist_group_wp['Age_Bracket'].isin(['15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49']))]['Sexed_Percent'].sum()\n", "\n", " \n", " print(f\"under5: {under5:.2%}\")\n", " print(f\"youth (15-24): {youth:.2%}\")\n", " print(f\"working_age (15-64): {working_age:.2%}\")\n", " print(f\"elderly (60+): {elderly:.2%}\")\n", " print(f\"reproductive_age, percent of women (15-50): {reproductive_age:.2%}\")\n", " print(f\"sex_ratio: {round(sex_ratio, 2)} males to 100 females\")\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 72, "id": "a83777d1-ef48-4145-a841-ba3c934b99ef", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\ncity=\\'Mumbai\\'\\ndef oxford_age_stats():\\n with open(\"global_inputs.yml\", \\'r\\') as f:\\n global_inputs = yaml.safe_load(f)\\n\\n # Get city inputs\\n city_inputs = global_inputs.get(\\'city_inputs\\')\\n \\n# Define the \\'oxford_age_stats\\' function\\ndef oxford_age_stats(city):\\n with open(\"global_inputs.yml\", \\'r\\') as f:\\n global_inputs = yaml.safe_load(f)\\n\\n # Get city inputs\\n city_inputs = global_inputs.get(\\'city_inputs\\')\\n\\n if \\'oxford\\' in menu and menu[\\'oxford\\']: \\n oxford_data_path = os.path.join(multi_scan_folder, \"Oxford Global Cities Data.csv\")\\n if os.path.exists(oxford_data_path): \\n oxford_data = pd.read_csv(oxford_data_path)\\n indicators = oxford_data[\\'Indicator\\'].drop_duplicates()\\n pop_dist_inds = [indicator for indicator in indicators if \"Population\" in indicator and indicator not in [\"Population 0-14\", \"Population 15-64\", \"Population 65+\"]] \\n\\n if city in oxford_data[\\'Location\\'].values:\\n print(f\"{city} exists in the Oxford file.\")\\n pop_dist_structure = oxford_data.loc[(oxford_data[\\'Location\\'] == city) & (oxford_data[\\'Indicator\\'].isin(pop_dist_inds))]\\n print(pop_dist_structure.head(3))\\n pop_dist_structure[\\'Age_Bracket\\'] = pop_dist_structure[\\'Indicator\\'].str[11:19]\\n # Convert to numeric, handling errors by setting them to NaN\\n pop_dist_structure[\\'Age_Bracket\\'] = pd.to_numeric(pop_dist_structure[\\'Age_Bracket\\'], errors=\\'coerce\\')\\n pop_dist_structure[\\'Group\\'] = pd.cut(\\n pop_dist_structure[\\'Age_Bracket\\'],\\n bins=[0, 4, 14, np.inf], # Replace float(\\'inf\\') with np.inf\\n labels=[\\'Young\\', \\'Working\\', \\'65+\\']\\n )\\n \\n pop_dist_structure = pop_dist_structure.groupby([\\'Year\\', \\'Group\\']).agg(Count=(\\'Value\\', \\'sum\\')).reset_index()\\n pop_dist_structure[\\'Percent\\'] = pop_dist_structure.groupby(\\'Year\\')[\\'Count\\'].transform(lambda x: x / x.sum())\\n pop_dist_structure[\\'pct_sum\\'] = pop_dist_structure.groupby(\\'Year\\')[\\'Percent\\'].cumsum()\\n return pop_dist_structure\\n else:\\n print(f\"{city} does not exist in the Oxford file.\")\\n else:\\n print(\"Oxford file does not exist.\")\\n else:\\n print(\"Oxford is not selected in the menu.\")\\n\\n# Example usage\\noxford_age_stats(\\'Mumbai\\')\\n'" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "#Age structure Oxford Economics (If in Oxford)\n", "\n", "'''\n", "city='Mumbai'\n", "def oxford_age_stats():\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " # Get city inputs\n", " city_inputs = global_inputs.get('city_inputs')\n", " \n", "# Define the 'oxford_age_stats' function\n", "def oxford_age_stats(city):\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " # Get city inputs\n", " city_inputs = global_inputs.get('city_inputs')\n", "\n", " if 'oxford' in menu and menu['oxford']: \n", " oxford_data_path = os.path.join(multi_scan_folder, \"Oxford Global Cities Data.csv\")\n", " if os.path.exists(oxford_data_path): \n", " oxford_data = pd.read_csv(oxford_data_path)\n", " indicators = oxford_data['Indicator'].drop_duplicates()\n", " pop_dist_inds = [indicator for indicator in indicators if \"Population\" in indicator and indicator not in [\"Population 0-14\", \"Population 15-64\", \"Population 65+\"]] \n", "\n", " if city in oxford_data['Location'].values:\n", " print(f\"{city} exists in the Oxford file.\")\n", " pop_dist_structure = oxford_data.loc[(oxford_data['Location'] == city) & (oxford_data['Indicator'].isin(pop_dist_inds))]\n", " print(pop_dist_structure.head(3))\n", " pop_dist_structure['Age_Bracket'] = pop_dist_structure['Indicator'].str[11:19]\n", " # Convert to numeric, handling errors by setting them to NaN\n", " pop_dist_structure['Age_Bracket'] = pd.to_numeric(pop_dist_structure['Age_Bracket'], errors='coerce')\n", " pop_dist_structure['Group'] = pd.cut(\n", " pop_dist_structure['Age_Bracket'],\n", " bins=[0, 4, 14, np.inf], # Replace float('inf') with np.inf\n", " labels=['Young', 'Working', '65+']\n", " )\n", " ''''''\n", " pop_dist_structure = pop_dist_structure.groupby(['Year', 'Group']).agg(Count=('Value', 'sum')).reset_index()\n", " pop_dist_structure['Percent'] = pop_dist_structure.groupby('Year')['Count'].transform(lambda x: x / x.sum())\n", " pop_dist_structure['pct_sum'] = pop_dist_structure.groupby('Year')['Percent'].cumsum()\n", " return pop_dist_structure\n", " else:\n", " print(f\"{city} does not exist in the Oxford file.\")\n", " else:\n", " print(\"Oxford file does not exist.\")\n", " else:\n", " print(\"Oxford is not selected in the menu.\")\n", "\n", "# Example usage\n", "oxford_age_stats('Mumbai')\n", "'''\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "07361797-faf5-4edc-9374-3e416c3dff56", "metadata": {}, "outputs": [], "source": [ "# Share of GDP, Emp, Pop (If in Oxford)" ] }, { "cell_type": "code", "execution_count": 12, "id": "bccb56a3-73a4-4c63-ba6a-7554098455f0", "metadata": {}, "outputs": [], "source": [ "# GDP, Pop, Emp Growth (If in Oxford)" ] }, { "cell_type": "code", "execution_count": 13, "id": "4fbd4ebe-58ff-42c7-9bb1-d7bc692ebcfe", "metadata": {}, "outputs": [], "source": [ "# GDP per capita (If in Oxford)" ] }, { "cell_type": "code", "execution_count": 14, "id": "82a89901-b37b-4440-8a70-50c288bd3700", "metadata": {}, "outputs": [], "source": [ "# Share of employment by sector (If in Oxford)" ] }, { "cell_type": "code", "execution_count": 15, "id": "976d9c30-ee1e-42ee-b3cc-881d5830a540", "metadata": {}, "outputs": [], "source": [ "#Economic Inequality (If in Oxford)" ] }, { "cell_type": "code", "execution_count": 73, "id": "7ae3cd22-889c-4859-8555-c955df2d6bfd", "metadata": {}, "outputs": [], "source": [ "def wsf_stats():\n", " # Load global inputs from YAML file\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " # Get city inputs\n", " city_inputs = global_inputs.get('city_inputs')\n", "\n", " if menu['wsf']: \n", " wsf_stats_file = Path(output_folder / f\"{city}_wsf_stats.csv\") \n", " if os.path.exists(wsf_stats_file): \n", " pass\n", " else:\n", " print(\"WSF stats file does not exist.\")\n", " return \n", " wsf = pd.read_csv(wsf_stats_file)\n", " \n", " wsf = wsf.rename(columns={'year': 'Year'}).\\\n", " loc[:, ['Year', 'cumulative sq km']].\\\n", " rename(columns={'cumulative sq km': 'uba_km2'})\n", "\n", " \n", " wsf['growth_pct'] = (wsf['uba_km2'] / wsf['uba_km2'].shift(1) - 1)\n", " wsf['growth_km2'] = wsf['uba_km2'] - wsf['uba_km2'].shift(1)\n", "\n", " # Plot\n", " plt.figure(figsize=(8, 6))\n", " plt.plot(wsf['Year'], wsf['uba_km2'], marker='o', linestyle='-')\n", " plt.title(\"Urban Built-up Area, 1985-2015\")\n", " plt.xlabel(\"Year\")\n", " plt.ylabel(\"Urban built-up area (sq. km)\")\n", " plt.grid(True)\n", " # Save as PNG in output folder\n", " render_path = os.path.join(render_folder, \"urban_built_up_area.png\")\n", " plt.savefig(render_path)\n", " plt.close()\n", "\n", " # Create Plotly\n", " fig = go.Figure()\n", "\n", " # Add trace for urban built-up area\n", " fig.add_trace(go.Scatter(\n", " x=wsf['Year'],\n", " y=wsf['uba_km2'],\n", " mode='lines+markers',\n", " name='Urban built-up area (km^2)'\n", " ))\n", "\n", " \n", " fig.update_layout(\n", " title=\"Urban Built-up Area, 1985-2015\",\n", " xaxis_title=\"Year\",\n", " yaxis_title=\"Urban built-up area (km^2)\",\n", " template='plotly_white',\n", " showlegend=True,\n", " hovermode='x'\n", " )\n", " fig.show()\n", " fig.write_html(render_path.replace('.png', '.html'))\n", " first_area = wsf['uba_km2'].iloc[0]\n", " latest_area = wsf['uba_km2'].iloc[-1]\n", " first_year = wsf['Year'].iloc[0]\n", " latest_year = wsf['Year'].iloc[-1]\n", " pct_growth = 100 * (latest_area - first_area) / first_area\n", " print(f\"The city's built-up area grew from {round(first_area, 2)} sq. km in {first_year} to {round(latest_area, 2)} in {latest_year} for {round(pct_growth, 2)}% growth\")\n" ] }, { "cell_type": "code", "execution_count": 74, "id": "0b3913e5-3125-4d45-bca2-9b5558d82040", "metadata": {}, "outputs": [], "source": [ "#Landcover Graph\n", "def lc_stats():\n", " \n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " \n", " city_inputs = global_inputs.get('city_inputs')\n", "\n", " if menu['landcover']: \n", " lc_stats_file = Path(output_folder / f\"{city}_lc.csv\") \n", " if not lc_stats_file.exists(): \n", " print(\"Land Cover stats file does not exist.\")\n", " return \n", " \n", " \n", " lc = pd.read_csv(lc_stats_file)\n", "\n", " # Define colors\n", " lc_colors = {\n", " \"Tree cover\": \"#397e48\",\n", " \"Built-up\": \"#c4281b\",\n", " \"Grassland\": \"#88af52\",\n", " \"Bare / sparse vegetation\": \"#a59b8f\",\n", " \"Cropland\": \"#e49634\",\n", " \"Water bodies\": \"#429bdf\",\n", " \"Permanent water bodies\": \"#00008b\",#change this\n", " \"Mangroves\": \"#90EE90\",#change this\n", " \"Moss and lichen\": \"#013220\",#change this\n", " \"Shrubland\": \"#dfc25a\",\n", " \"Herbaceous wetland\": \"#7d87c4\",\n", " \"Snow and ice\": \"#F5F5F5\"\n", " }\n", " #connect it to the frontend layers.yml\n", "\n", " \n", " total_pixels = lc['Pixel Count'].sum()\n", "\n", " \n", " lc['Percentage'] = lc['Pixel Count'] / total_pixels * 100\n", "\n", " \n", " plt.figure(figsize=(8, 8))\n", " plt.pie(lc['Pixel Count'], labels=None, colors=[lc_colors[lc] for lc in lc['Land Cover Type']], startangle=90)\n", " plt.title(\"Land Cover Distribution\")\n", " \n", "\n", " \n", " render_path = os.path.join(render_folder, \"landcover.png\")\n", " plt.savefig(render_path)\n", " plt.close()\n", " \n", " \n", " fig = px.pie(lc, values='Pixel Count', names='Land Cover Type', color='Land Cover Type', color_discrete_map=lc_colors)\n", " fig.update_traces(textposition='inside', textinfo='percent+label')\n", " fig.show()\n", " fig.write_html(render_path.replace('.png', '.html'))\n", "\n", " # Dictionary to convert integers to ordinal strings\n", " ordinal_dict = {1: \"first\", 2: \"second\", 3: \"third\"}\n", "\n", " # Print the percentage of the first three highest values\n", " highest_values = lc.sort_values(by='Percentage', ascending=False).head(3)\n", " for i, (index, row) in enumerate(highest_values.iterrows()):\n", " ordinal_str = ordinal_dict.get(i + 1, str(i + 1) + \"th\")\n", " print(f\"The {ordinal_str} highest landcover value is {row['Land Cover Type']} with {row['Percentage']:.2f}% of the total land area\")\n", " \n", " \n", "\n" ] }, { "cell_type": "code", "execution_count": 75, "id": "41ad0554-e5ec-4ed7-b374-b9b3a48c6428", "metadata": {}, "outputs": [], "source": [ "#Elevation Graph\n", "def elev_stats():\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", " city_inputs = global_inputs.get('city_inputs')\n", " if menu['elevation']: \n", " elev_stats_file = Path(output_folder / f\"{city}_elevation.csv\") \n", " if not elev_stats_file.exists(): \n", " print(\"Elevation stats file does not exist.\")\n", " return \n", " elev = pd.read_csv(elev_stats_file)\n", " elev['percent'] = pd.to_numeric(elev['percent'], errors='coerce')\n", "\n", " \n", " elevation = elev.dropna(subset=['legend']).copy() # Create a copy to avoid the warning\n", "\n", " # Preprocess the data\n", " elevation['Percent'] = elevation['percent'].apply(lambda x: f\"{x:.0%}\") \n", " elevation['Elevation'] = pd.to_numeric(elevation['legend'].str.split('-').str[0], errors='coerce') \n", " elevation = elevation.dropna(subset=['Elevation']) \n", " elevation['legend'] = pd.Categorical(elevation['legend'], categories=elevation['legend'].unique()) \n", " print(elevation)\n", " # Define colors\n", " elevation_colors = {\n", " \"0-2\": \"#f5c4c0\",\n", " \"2-5\": \"#f19bb4\",\n", " \"5-10\": \"#ec5fa1\",\n", " \"10-20\": \"#c20b8a\",\n", " \"20+\": \"#762175\"\n", " }\n", " # Create the pie chart\n", " plt.figure(figsize=(8, 5))\n", " wedges, texts, autotexts = plt.pie(elevation['percent'], labels=None, colors=elevation_colors.values(), startangle=90, autopct='%1.0f%%')\n", " plt.title(\"Elevation Distribution\")\n", " plt.legend(elevation['Elevation'], loc=\"center left\", bbox_to_anchor=(1, 0, 0.5, 1))\n", "\n", " # Add percentage labels\n", " for autotext in autotexts:\n", " autotext.set_color('white')\n", " autotext.set_size(10)\n", " \n", " # Save the plot\n", " render_path = os.path.join(render_folder, \"elevation.png\")\n", " plt.savefig(render_path, bbox_inches='tight')\n", " plt.close()\n", "\n", " # Create a pie chart with percentage labels\n", " fig = px.pie(elevation, values='percent', names='Elevation', hole=0.3,\n", " color_discrete_sequence=list(elevation_colors.values()), \n", " title=\"Elevation Distribution\", \n", " labels={'percent': 'Percentage', 'Elevation': 'Elevation Range'},\n", " template='plotly_white')\n", "\n", " # Update layout\n", " fig.update_layout(legend=dict(orientation=\"h\", x=0.5, y=1.1),\n", " margin=dict(l=0, r=0, t=50, b=0))\n", " fig.show()\n", " fig.write_html(render_path.replace('.png', '.html'))\n", "\n", " # Print the percentage of land at an elevation of 20+\n", " max_percent_index = elevation['percent'].idxmax()\n", " highest_percent_row = elevation.loc[max_percent_index]\n", " print(\"Highest percentage entry for Elevation:\")\n", " print(highest_percent_row)\n", "\n", "\n", "\n", "\n", "\n", "#why are they different?" ] }, { "cell_type": "code", "execution_count": 76, "id": "be08dae0-5584-42e6-b59d-9eaa5cd4d6a7", "metadata": {}, "outputs": [], "source": [ "#Slope Graph\n", "def slope_stats():\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", " city_inputs = global_inputs.get('city_inputs')\n", " if menu['elevation']: \n", " slope_stats_file = Path(output_folder / f\"{city}_slope.csv\") \n", " if not slope_stats_file.exists(): \n", " print(\"Elevation stats file does not exist.\")\n", " return \n", " slope = pd.read_csv(slope_stats_file)\n", " slope['percent'] = pd.to_numeric(slope['percent'], errors='coerce')\n", "\n", " # Drop rows with NaN values in 'legend'\n", " slope = slope.dropna(subset=['legend']).copy()\n", "\n", " # Preprocess the data\n", " slope['Percent'] = slope['percent'].apply(lambda x: f\"{x:.0%}\")\n", " slope['Slope'] = slope['legend'].str.extract(r\"(\\d+)\").astype(float)\n", " slope['legend'] = pd.Categorical(slope['legend'], categories=slope['legend'].unique())\n", "\n", " # Define colors\n", " slope_colors = {\n", " \"0-2\": \"#ffffd4\",\n", " \"2-5\": \"#fed98e\",\n", " \"5-10\": \"#fe9929\",\n", " \"10-20\": \"#d95f0e\",\n", " \"20+\": \"#993404\"\n", " }\n", "\n", " # Plot the donut chart\n", " plt.figure(figsize=(8, 5))\n", " plt.pie(slope['percent'], labels=None, colors=slope_colors.values(), startangle=90)\n", " plt.title(\"Slope Distribution\")\n", " plt.legend(slope['Slope'], loc=\"center left\", bbox_to_anchor=(1, 0, 0.5, 1))\n", " # Save the plot\n", " render_path = os.path.join(render_folder, \"slope.png\")\n", " plt.savefig(render_path, bbox_inches='tight')\n", " plt.close()\n", "\n", " # Plotly\n", " fig = px.pie(slope, values='percent', names='Slope', hole=0.3,\n", " color_discrete_sequence=list(slope_colors.values()), \n", " title=\"Slope Distribution\", \n", " labels={'percent': 'Percentage', 'Slope': 'Slope Range'},\n", " template='plotly_white')\n", "\n", " # Update layout\n", " fig.update_layout(legend=dict(orientation=\"h\", x=0.5, y=1.1),\n", " margin=dict(l=0, r=0, t=50, b=0))\n", " fig.show()\n", " fig.write_html(render_path.replace('.png', '.html'))\n", "\n", " # Print the highest percentage value and consequent class\n", " max_percent_index = slope['percent'].idxmax()\n", " highest_percent_row = slope.loc[max_percent_index]\n", " print(\"Highest percentage entry for Slope:\")\n", " print(highest_percent_row)\n" ] }, { "cell_type": "code", "execution_count": 31, "id": "ab2851a1-315a-445d-8982-bec89bbe1bc2", "metadata": {}, "outputs": [], "source": [ "#Cyclone Graph" ] }, { "cell_type": "code", "execution_count": null, "id": "505a5d75", "metadata": {}, "outputs": [], "source": [ "#earthquake timeline\n" ] }, { "cell_type": "code", "execution_count": 77, "id": "e56dba45", "metadata": {}, "outputs": [], "source": [ "import yaml\n", "import geopandas as gpd\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from shapely.geometry import Point, LineString\n", "\n", "def flood_timeline():\n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " city_inputs = global_inputs.get('city_inputs')\n", "\n", " if menu['flood']: \n", " flood_archive_path = global_inputs.get('flood_timeline_source')\n", " flood_archive = gpd.read_file(flood_archive_path)\n", " flood_archive = flood_archive[flood_archive.is_valid]\n", " aoi = features.to_crs(flood_archive.crs)\n", " flood_archive = flood_archive[flood_archive.intersects(aoi.unary_union)]\n", " fig, ax = plt.subplots()\n", " flood_archive.plot(column='DEAD', ax=ax, legend=True)\n", "\n", " floods = flood_archive[['BEGAN', 'ENDED', 'DEAD', 'DISPLACED', 'MAINCAUSE', 'SEVERITY']]\n", "\n", " # Tally of flood events\n", " print(\"Tally of flood events\")\n", " print(floods.agg({'DEAD': 'sum', 'DISPLACED': 'sum', 'BEGAN': 'count'}))\n", "\n", " duration = (pd.to_datetime(floods['ENDED']) - pd.to_datetime(floods['BEGAN'])).dt.days\n", "\n", " # Prepare text for plotting\n", " flood_text = floods.copy()\n", " flood_text['severity'] = np.select([flood_text['SEVERITY'] == 1, flood_text['SEVERITY'] == 1.5, flood_text['SEVERITY'] == 2],\n", " ['Large event', 'Very large event', 'Extreme event'])\n", " flood_text['duration'] = duration\n", " flood_text['text'] = flood_text.apply(lambda row: f\"{row['BEGAN']}, {row['severity'].lower()} {row['MAINCAUSE']}, \"\n", " f\"{row['duration']} days, {row['DEAD']:,} fatalities, {row['DISPLACED']:,} displaced\",\n", " axis=1)\n", "\n", " # Calculate coordinates for text plotting\n", " flood_text['node_x'] = pd.to_datetime(flood_text['BEGAN']) + pd.to_timedelta(1460 * (2 * (flood_text.index % 2) - 1), unit='D')\n", " flood_text['node_y'] = 0\n", "\n", " # Prepare lines for text plotting\n", " flood_lines = pd.concat([flood_text[['BEGAN', 'node_x', 'node_y']],\n", " pd.DataFrame({'BEGAN': flood_text['BEGAN'], 'node_x': flood_text['BEGAN'], 'node_y': 0}),\n", " pd.DataFrame({'BEGAN': flood_text['BEGAN'], 'node_x': flood_text['BEGAN'], 'node_y': 0})])\n", "\n", " # Plotting\n", " fig, ax = plt.subplots(figsize=(20, 5.833))\n", " ax.plot(flood_lines['node_x'], flood_lines['node_y'], color='blue', linestyle='-')\n", " ax.scatter(flood_text['node_x'], flood_text['node_y'], color='red')\n", " for idx, row in flood_text.iterrows():\n", " ax.text(row['node_x'] + pd.to_timedelta(30, unit='D'), row['node_y'] - 50, row['text'], rotation=30, ha='center')\n", " ax.set_xlim(pd.Timestamp('1984-01-01'), pd.Timestamp('2020-12-31'))\n", " ax.set_ylim(-1800, 1800)\n", " ax.set_yticks([1825, 3650])\n", " ax.set_yticklabels([1825, 3650])\n", " plt.show()\n", "\n", "\n", "\n", "#workshop this one \n" ] }, { "cell_type": "code", "execution_count": 78, "id": "5e8fe622-51c5-4159-906f-74405ebe4101", "metadata": {}, "outputs": [], "source": [ "def extract_monthly_stats():\n", " \n", " with open(\"global_inputs.yml\", 'r') as f:\n", " global_inputs = yaml.safe_load(f)\n", "\n", " pv_directory = global_inputs.get('solar_graph_source')\n", "\n", " \n", " pv_files = [f for f in os.listdir(pv_directory) if f.endswith('.tif')]\n", "\n", " \n", " monthly_pv = []\n", "\n", " aoi = aoi_file.geometry\n", "\n", " for f in pv_files:\n", " pattern = re.compile(r'PVOUT_(\\d{2})')\n", " match = pattern.search(f)\n", " if match:\n", " m = int(match.group(1)) # Extract the month\n", "\n", " file_path = os.path.join(pv_directory, f)\n", "\n", " with rasterio.open(file_path) as src:\n", " \n", " raster_data, raster_transform = mask(src, aoi.geometry, crop=True)\n", "\n", " stats = {\n", " 'month': m,\n", " 'max': np.nanmax(raster_data),\n", " 'min': np.nanmin(raster_data),\n", " 'mean': np.nanmean(raster_data),\n", " 'sum': np.nansum(raster_data)\n", " }\n", "\n", " \n", " monthly_pv.append(stats)\n", " else:\n", " print(f\"No match found for filename: {f}\")\n", "\n", " monthly_pv_df = pd.DataFrame(monthly_pv)\n", "\n", " # Sort by 'month'\n", " monthly_pv_df.sort_values(by='month', inplace=True)\n", "\n", " # Calculate daily PV energy yield\n", " monthly_pv_df['daily'] = monthly_pv_df['mean'] \n", "\n", " \n", " highest_value = monthly_pv_df['daily'].max()\n", " lowest_value = monthly_pv_df['daily'].min()\n", "\n", " \n", " ratio = highest_value / lowest_value\n", "\n", " # Check if the ratio is greater than 2.5\n", " if ratio > 2.5:\n", " print(\"Seasonality is high, making solar energy available throughout the year\")\n", " else:\n", " print(\"Seasonality is low to moderate, making solar energy available in only some of the months\")\n", "\n", " # Plot\n", " plt.figure(figsize=(10, 6))\n", " plt.plot(monthly_pv_df['month'], monthly_pv_df['daily'], marker='o', linestyle='-')\n", " plt.text(1, 4.6, 'Excellent Conditions', color='darkgrey', verticalalignment='bottom', horizontalalignment='left')\n", " plt.text(1, 3.6, 'Favorable Conditions', color='darkgrey', verticalalignment='bottom', horizontalalignment='left')\n", " plt.xlabel('Month')\n", " plt.ylabel('Daily PV energy yield (kWh/kWp)')\n", " plt.title('Seasonal availability of solar energy')\n", " plt.xticks(np.arange(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])\n", " plt.grid(True)\n", " plt.axhline(y=3.5, linestyle='--', color='black')\n", " plt.axhline(y=4.5, linestyle='--', color='black')\n", " plt.tight_layout()\n", " # Save the plot\n", " render_path = os.path.join(render_folder, \"_PV_graph.png\")\n", " plt.savefig(render_path, bbox_inches='tight')\n", " plt.close()\n", " #Plotly\n", " fig = px.line(monthly_pv_df, x='month', y='daily', markers=True)\n", " fig.add_annotation(x=1, y=4.6, text='Excellent Conditions', showarrow=False, font=dict(color='darkgrey'), xshift=5)\n", " fig.add_annotation(x=1, y=3.6, text='Favorable Conditions', showarrow=False, font=dict(color='darkgrey'), xshift=5)\n", " fig.update_xaxes(title='Month', tickvals=list(range(1, 13)), ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])\n", " fig.update_yaxes(title='Daily PV energy yield (kWh/kWp)', range=[0, 5])\n", " fig.add_shape(type=\"line\", x0=1, y0=3.5, x1=12, y1=3.5, line=dict(color=\"black\", width=1, dash='dash'))\n", " fig.add_shape(type=\"line\", x0=1, y0=4.5, x1=12, y1=4.5, line=dict(color=\"black\", width=1, dash='dash'))\n", " fig.update_layout(title='Seasonal availability of solar energy', xaxis=dict(showgrid=True, zeroline=False),plot_bgcolor='white')\n", "\n", " fig.show()\n", " fig.write_html(render_path.replace('.png', '.html'))\n", "\n" ] }, { "cell_type": "code", "execution_count": 79, "id": "5e48c01f-f509-4137-8d5b-78a7f336dbf6", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/ipshitakarmakar/mambaforge/envs/geo/share/jupyter/nbconvert/templates/base/display_priority.j2:32: UserWarning:\n", "\n", "Your element with mimetype(s) dict_keys(['application/vnd.plotly.v1+json']) is not able to be represented.\n", "\n" ] } ], "source": [ "#Save output\n", "def export_outputs_to_markdown(notebook_path, output_path):\n", " # Load the notebook\n", " with open(notebook_path, 'r', encoding='utf-8') as f:\n", " notebook_content = nbformat.read(f, as_version=4)\n", " \n", " # Initialize the Markdown exporter\n", " markdown_exporter = MarkdownExporter()\n", " markdown_exporter.exclude_input = True # Exclude input cells from the Markdown\n", " \n", " # Convert the notebook to Markdown format\n", " markdown_output, resources = markdown_exporter.from_notebook_node(notebook_content)\n", " \n", " # Remove the folders for plots and pickles from the resources\n", " resources.pop('outputs', None)\n", " resources.pop('output_files', None)\n", " \n", " # Write the Markdown content to a file\n", " with open(output_path, 'w', encoding='utf-8') as f:\n", " f.write(markdown_output)\n", "\n", "# Path to the input notebook\n", "input_notebook_path = \"/Users/ipshitakarmakar/Documents/GitHub/city-scan-automation/scan_assembly.ipynb\"\n", "\n", "# Path for the output Markdown file\n", "output_markdown_path = \"/Users/ipshitakarmakar/Documents/GitHub/city-scan-automation/output_notebook.md\"\n", " \n", "# Export the outputs to Markdown\n", "export_outputs_to_markdown(input_notebook_path, output_markdown_path)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }