{
"cells": [
{
"cell_type": "code",
"execution_count": 64,
"id": "22b944c0-d8e0-478c-a3d8-ea670d90cc2f",
"metadata": {},
"outputs": [],
"source": [
"import yaml"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "2607ecaa-758c-4a3e-80f2-050f2c73df8b",
"metadata": {},
"outputs": [],
"source": [
"# load menu\n",
"with open(\"mnt/city-directories/01-user-input/menu.yml\", 'r') as f:\n",
" menu = yaml.safe_load(f)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "a6992e26-b62f-41fc-85e7-66c95e7380e9",
"metadata": {},
"outputs": [],
"source": [
"if menu['all_stats']:\n",
" import os\n",
" import glob\n",
" import math\n",
" import geopandas as gpd\n",
" import pandas as pd\n",
" import numpy as np\n",
" from io import StringIO\n",
" import requests\n",
" from sklearn.preprocessing import MinMaxScaler\n",
" from shapely.geometry import shape\n",
" from shapely.ops import unary_union\n",
" import pint\n",
" import folium\n",
" from pathlib import Path\n",
" import matplotlib.pyplot as plt\n",
" import requests\n",
" import re\n",
" import rasterio\n",
" from rasterio.mask import mask\n",
" from shapely.geometry import Point\n",
" from fiona.crs import from_epsg\n",
" from nbconvert import MarkdownExporter\n",
" import nbformat\n",
" import base64\n",
" import pickle\n",
" import plotly.graph_objects as go\n",
" import seaborn as sns\n",
" import plotly.express as px"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "0ca8d4ff-ae23-4850-95f6-4a374d77ab88",
"metadata": {},
"outputs": [],
"source": [
"url = \"https://raw.githubusercontent.com/compoundrisk/monitor/databricks/src/country-groups.csv\"\n",
"country_groups = pd.read_csv(url)\n",
"\n",
"# Source helper functions\n",
"helpers_url = \"https://raw.githubusercontent.com/compoundrisk/monitor/databricks/src/fns/helpers.R\"\n",
"helpers_code = requests.get(helpers_url).text\n",
"\n",
"# Define tolatin function\n",
"def tolatin(x):\n",
" return stri_trans_general(x, id=\"Latin-ASCII\")\n",
"\n",
"# Define normalize function\n",
"def normalize(x):\n",
" x_min = np.min(x)\n",
" x_max = np.max(x)\n",
" return (x - x_min) / (x_max - x_min)\n",
"\n",
"def print_text(x, linebreaks=2):\n",
" print(x + \"\\n\" + \"
\" * linebreaks)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "a5df3a99-85dd-4b6b-93d7-bbd0a68679f7",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# SET UP ##############################################\n",
"\n",
"# load city inputs files, to be updated for each city scan\n",
"with open(\"city_inputs.yml\", 'r') as f:\n",
" city_inputs = yaml.safe_load(f)\n",
"\n",
"city = city_inputs['city_name'].replace(' ', '_').lower()\n",
"country = city_inputs['country_name'].replace(' ', '_').lower()\n",
"# load global inputs, such as data sources that generally remain the same across scans\n",
"with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
"# Read AOI shapefile --------\n",
"# transform the input shp to correct prj (epsg 4326)\n",
"aoi_file = gpd.read_file(city_inputs['AOI_path']).to_crs(epsg = 4326)\n",
"features = aoi_file.geometry\n",
"\n",
"# Define output folder ---------\n",
"output_folder = Path('mnt/city-directories/02-process-output')\n",
"# Define render folder ---------\n",
"render_folder = Path('mnt/city-directories/03-render-output')\n",
"multi_scan_folder = Path('multi-scan-materials')\n",
"\n",
"if not os.path.exists(output_folder):\n",
" os.mkdir(output_folder)\n",
"\n",
"fig, ax = plt.subplots()\n",
"features.plot(ax=ax)\n",
"plt.title('AOI')\n",
"plt.xlabel('Longitude')\n",
"plt.ylabel('Latitude')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "18954664-49f8-43b5-96c1-7faef1809401",
"metadata": {},
"outputs": [],
"source": [
"#AOI AREA\n",
"def calculate_aoi_area(features):\n",
" # Create a unit registry\n",
" ureg = pint.UnitRegistry()\n",
"\n",
" # Combine geometries using unary_union from shapely\n",
" combined_geometry = unary_union(features)\n",
"\n",
" # Calculate the area in square kilometers\n",
" area_km2 = combined_geometry.area / 1e6 # Convert square meters to square kilometers\n",
"\n",
" # Print the result using the pint library for unit formatting\n",
" area_quantity = area_km2 * ureg.km**2\n",
" return area_quantity.to('km^2')\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "c1cc951b-ac81-470a-b400-dd38767a235b",
"metadata": {},
"outputs": [],
"source": [
"#Climate\n",
"def get_koeppen_classification():\n",
" # Load global inputs from YAML file\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" # Calculate centroid of AOI\n",
" centroid = features.centroid.values[0]\n",
"\n",
" # Retrieve centroid coordinates\n",
" coords = {' Lon': centroid.x, 'Lat': centroid.y}\n",
"\n",
" # Read Köppen climate classification file\n",
" koeppen_file_path = global_inputs.get('koeppen_source') #use an alternate dataset\n",
" koeppen = pd.read_csv(koeppen_file_path)\n",
"\n",
" # Filter Köppen data for the region around the centroid with a buffer of 0.5 degrees\n",
" lon_min, lon_max = coords[' Lon'] - 0.5, coords[' Lon'] + 0.5\n",
" lat_min, lat_max = coords['Lat'] - 0.5, coords['Lat'] + 0.5\n",
" koeppen_city = koeppen[\n",
" (koeppen[' Lon'].between(lon_min, lon_max)) &\n",
" (koeppen['Lat'].between(lat_min, lat_max))\n",
" ][' Cls'].unique()\n",
"\n",
" # Print Köppen climate classification\n",
" koeppen_text = ', '.join(koeppen_city)\n",
" print(f\"Köppen climate classification: {koeppen_text} (See https://en.wikipedia.org/wiki/Köppen_climate_classification for classes)\")\n",
"\n",
" # Return Köppen climate classification if needed\n",
" return koeppen_city\n"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "39d2f590-64e2-46d8-8fda-62b3eb6019b3",
"metadata": {},
"outputs": [],
"source": [
"#Age Distribution by World Pop\n",
"def age_stats():\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" # Get city inputs\n",
" city_inputs = global_inputs.get('city_inputs')\n",
"\n",
" if menu['demographics']: \n",
" age_file = os.path.join(output_folder, f\"{city}_demographics.csv\") \n",
" if os.path.exists(age_file): \n",
" pass\n",
" else:\n",
" print(\"Demographics file does not exist.\")\n",
" return \n",
" pop_dist_group_wp = pd.read_csv(age_file)\n",
"\n",
" \n",
" pop_dist_group_wp = pop_dist_group_wp.rename(columns={\"age_group\": \"Age_Bracket\", \"sex\": \"Sex\"})\n",
" pop_dist_group_wp['Count'] = pd.to_numeric(pop_dist_group_wp['population'], errors='coerce')\n",
" pop_dist_group_wp['Age_Bracket'] = pop_dist_group_wp['Age_Bracket'].replace({'<1': '0-4', '1-4': '0-4'})\n",
" pop_dist_group_wp = pop_dist_group_wp.groupby(['Age_Bracket', 'Sex']).agg(Count=('Count', 'sum')).reset_index()\n",
"\n",
" \n",
" pop_dist_group_wp['Percentage'] = pop_dist_group_wp.groupby('Sex')['Count'].transform(lambda x: x / x.sum())\n",
" pop_dist_group_wp['Sexed_Percent'] = pop_dist_group_wp.groupby('Sex')['Count'].transform(lambda x: x / x.sum())\n",
" pop_dist_group_wp['Sexed_Percent_cum'] = pop_dist_group_wp.groupby('Sex')['Sexed_Percent'].cumsum()\n",
"\n",
" # Plot age-sex distribution\n",
" sns.barplot(data=pop_dist_group_wp, x='Age_Bracket', y='Percentage', hue='Sex', dodge=True)\n",
" plt.title(f\"Population distribution in {city} by sex\")\n",
" plt.xlabel(\"Age Bracket\")\n",
" plt.ylabel(\"Percentage\")\n",
" plt.legend(title=\"Sex\", loc=\"upper right\")\n",
" plt.xticks(rotation=45)\n",
" plt.tight_layout()\n",
" render_path = os.path.join(render_folder, \"age_stats.png\")\n",
" plt.savefig(render_path)\n",
" plt.close()\n",
"\n",
" plt.show()\n",
"\n",
" # Plotly\n",
" fig = px.bar(pop_dist_group_wp, x='Age_Bracket', y='Percentage', color='Sex', barmode='group', \n",
" title=f\"Population distribution in {city} by sex\", \n",
" labels={'Age_Bracket': 'Age Bracket', 'Percentage': 'Percentage', 'Sex': 'Sex'})\n",
"\n",
" \n",
" fig.update_layout(xaxis_title=\"Age Bracket\", yaxis_title=\"Percentage\", legend_title=\"Sex\",plot_bgcolor='white') #swap the colors\n",
" fig.update_xaxes(tickangle=45)\n",
"\n",
" \n",
" fig.show()\n",
" fig.write_html(render_path.replace('.png', '.html'))\n",
" under5 = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'] == '0-4']['Percentage'].sum()\n",
" youth = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'].isin(['15-19', '20-24'])]['Percentage'].sum()\n",
" working_age = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'].isin(['15-64'])]['Percentage'].sum()\n",
" elderly = pop_dist_group_wp[pop_dist_group_wp['Age_Bracket'].isin(['60-64', '65-69', '70-74', '75-79', '80+'])]['Percentage'].sum()\n",
" female_pct = pop_dist_group_wp[pop_dist_group_wp['Sex'] == 'f']['Percentage'].sum()\n",
" sex_ratio = (1 - female_pct) / female_pct * 100\n",
" reproductive_age = pop_dist_group_wp[(pop_dist_group_wp['Sex'] == 'f') & (pop_dist_group_wp['Age_Bracket'].isin(['15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49']))]['Sexed_Percent'].sum()\n",
"\n",
" \n",
" print(f\"under5: {under5:.2%}\")\n",
" print(f\"youth (15-24): {youth:.2%}\")\n",
" print(f\"working_age (15-64): {working_age:.2%}\")\n",
" print(f\"elderly (60+): {elderly:.2%}\")\n",
" print(f\"reproductive_age, percent of women (15-50): {reproductive_age:.2%}\")\n",
" print(f\"sex_ratio: {round(sex_ratio, 2)} males to 100 females\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "a83777d1-ef48-4145-a841-ba3c934b99ef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\ncity=\\'Mumbai\\'\\ndef oxford_age_stats():\\n with open(\"global_inputs.yml\", \\'r\\') as f:\\n global_inputs = yaml.safe_load(f)\\n\\n # Get city inputs\\n city_inputs = global_inputs.get(\\'city_inputs\\')\\n \\n# Define the \\'oxford_age_stats\\' function\\ndef oxford_age_stats(city):\\n with open(\"global_inputs.yml\", \\'r\\') as f:\\n global_inputs = yaml.safe_load(f)\\n\\n # Get city inputs\\n city_inputs = global_inputs.get(\\'city_inputs\\')\\n\\n if \\'oxford\\' in menu and menu[\\'oxford\\']: \\n oxford_data_path = os.path.join(multi_scan_folder, \"Oxford Global Cities Data.csv\")\\n if os.path.exists(oxford_data_path): \\n oxford_data = pd.read_csv(oxford_data_path)\\n indicators = oxford_data[\\'Indicator\\'].drop_duplicates()\\n pop_dist_inds = [indicator for indicator in indicators if \"Population\" in indicator and indicator not in [\"Population 0-14\", \"Population 15-64\", \"Population 65+\"]] \\n\\n if city in oxford_data[\\'Location\\'].values:\\n print(f\"{city} exists in the Oxford file.\")\\n pop_dist_structure = oxford_data.loc[(oxford_data[\\'Location\\'] == city) & (oxford_data[\\'Indicator\\'].isin(pop_dist_inds))]\\n print(pop_dist_structure.head(3))\\n pop_dist_structure[\\'Age_Bracket\\'] = pop_dist_structure[\\'Indicator\\'].str[11:19]\\n # Convert to numeric, handling errors by setting them to NaN\\n pop_dist_structure[\\'Age_Bracket\\'] = pd.to_numeric(pop_dist_structure[\\'Age_Bracket\\'], errors=\\'coerce\\')\\n pop_dist_structure[\\'Group\\'] = pd.cut(\\n pop_dist_structure[\\'Age_Bracket\\'],\\n bins=[0, 4, 14, np.inf], # Replace float(\\'inf\\') with np.inf\\n labels=[\\'Young\\', \\'Working\\', \\'65+\\']\\n )\\n \\n pop_dist_structure = pop_dist_structure.groupby([\\'Year\\', \\'Group\\']).agg(Count=(\\'Value\\', \\'sum\\')).reset_index()\\n pop_dist_structure[\\'Percent\\'] = pop_dist_structure.groupby(\\'Year\\')[\\'Count\\'].transform(lambda x: x / x.sum())\\n pop_dist_structure[\\'pct_sum\\'] = pop_dist_structure.groupby(\\'Year\\')[\\'Percent\\'].cumsum()\\n return pop_dist_structure\\n else:\\n print(f\"{city} does not exist in the Oxford file.\")\\n else:\\n print(\"Oxford file does not exist.\")\\n else:\\n print(\"Oxford is not selected in the menu.\")\\n\\n# Example usage\\noxford_age_stats(\\'Mumbai\\')\\n'"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"#Age structure Oxford Economics (If in Oxford)\n",
"\n",
"'''\n",
"city='Mumbai'\n",
"def oxford_age_stats():\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" # Get city inputs\n",
" city_inputs = global_inputs.get('city_inputs')\n",
" \n",
"# Define the 'oxford_age_stats' function\n",
"def oxford_age_stats(city):\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" # Get city inputs\n",
" city_inputs = global_inputs.get('city_inputs')\n",
"\n",
" if 'oxford' in menu and menu['oxford']: \n",
" oxford_data_path = os.path.join(multi_scan_folder, \"Oxford Global Cities Data.csv\")\n",
" if os.path.exists(oxford_data_path): \n",
" oxford_data = pd.read_csv(oxford_data_path)\n",
" indicators = oxford_data['Indicator'].drop_duplicates()\n",
" pop_dist_inds = [indicator for indicator in indicators if \"Population\" in indicator and indicator not in [\"Population 0-14\", \"Population 15-64\", \"Population 65+\"]] \n",
"\n",
" if city in oxford_data['Location'].values:\n",
" print(f\"{city} exists in the Oxford file.\")\n",
" pop_dist_structure = oxford_data.loc[(oxford_data['Location'] == city) & (oxford_data['Indicator'].isin(pop_dist_inds))]\n",
" print(pop_dist_structure.head(3))\n",
" pop_dist_structure['Age_Bracket'] = pop_dist_structure['Indicator'].str[11:19]\n",
" # Convert to numeric, handling errors by setting them to NaN\n",
" pop_dist_structure['Age_Bracket'] = pd.to_numeric(pop_dist_structure['Age_Bracket'], errors='coerce')\n",
" pop_dist_structure['Group'] = pd.cut(\n",
" pop_dist_structure['Age_Bracket'],\n",
" bins=[0, 4, 14, np.inf], # Replace float('inf') with np.inf\n",
" labels=['Young', 'Working', '65+']\n",
" )\n",
" ''''''\n",
" pop_dist_structure = pop_dist_structure.groupby(['Year', 'Group']).agg(Count=('Value', 'sum')).reset_index()\n",
" pop_dist_structure['Percent'] = pop_dist_structure.groupby('Year')['Count'].transform(lambda x: x / x.sum())\n",
" pop_dist_structure['pct_sum'] = pop_dist_structure.groupby('Year')['Percent'].cumsum()\n",
" return pop_dist_structure\n",
" else:\n",
" print(f\"{city} does not exist in the Oxford file.\")\n",
" else:\n",
" print(\"Oxford file does not exist.\")\n",
" else:\n",
" print(\"Oxford is not selected in the menu.\")\n",
"\n",
"# Example usage\n",
"oxford_age_stats('Mumbai')\n",
"'''\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "07361797-faf5-4edc-9374-3e416c3dff56",
"metadata": {},
"outputs": [],
"source": [
"# Share of GDP, Emp, Pop (If in Oxford)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "bccb56a3-73a4-4c63-ba6a-7554098455f0",
"metadata": {},
"outputs": [],
"source": [
"# GDP, Pop, Emp Growth (If in Oxford)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "4fbd4ebe-58ff-42c7-9bb1-d7bc692ebcfe",
"metadata": {},
"outputs": [],
"source": [
"# GDP per capita (If in Oxford)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "82a89901-b37b-4440-8a70-50c288bd3700",
"metadata": {},
"outputs": [],
"source": [
"# Share of employment by sector (If in Oxford)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "976d9c30-ee1e-42ee-b3cc-881d5830a540",
"metadata": {},
"outputs": [],
"source": [
"#Economic Inequality (If in Oxford)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "7ae3cd22-889c-4859-8555-c955df2d6bfd",
"metadata": {},
"outputs": [],
"source": [
"def wsf_stats():\n",
" # Load global inputs from YAML file\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" # Get city inputs\n",
" city_inputs = global_inputs.get('city_inputs')\n",
"\n",
" if menu['wsf']: \n",
" wsf_stats_file = Path(output_folder / f\"{city}_wsf_stats.csv\") \n",
" if os.path.exists(wsf_stats_file): \n",
" pass\n",
" else:\n",
" print(\"WSF stats file does not exist.\")\n",
" return \n",
" wsf = pd.read_csv(wsf_stats_file)\n",
" \n",
" wsf = wsf.rename(columns={'year': 'Year'}).\\\n",
" loc[:, ['Year', 'cumulative sq km']].\\\n",
" rename(columns={'cumulative sq km': 'uba_km2'})\n",
"\n",
" \n",
" wsf['growth_pct'] = (wsf['uba_km2'] / wsf['uba_km2'].shift(1) - 1)\n",
" wsf['growth_km2'] = wsf['uba_km2'] - wsf['uba_km2'].shift(1)\n",
"\n",
" # Plot\n",
" plt.figure(figsize=(8, 6))\n",
" plt.plot(wsf['Year'], wsf['uba_km2'], marker='o', linestyle='-')\n",
" plt.title(\"Urban Built-up Area, 1985-2015\")\n",
" plt.xlabel(\"Year\")\n",
" plt.ylabel(\"Urban built-up area (sq. km)\")\n",
" plt.grid(True)\n",
" # Save as PNG in output folder\n",
" render_path = os.path.join(render_folder, \"urban_built_up_area.png\")\n",
" plt.savefig(render_path)\n",
" plt.close()\n",
"\n",
" # Create Plotly\n",
" fig = go.Figure()\n",
"\n",
" # Add trace for urban built-up area\n",
" fig.add_trace(go.Scatter(\n",
" x=wsf['Year'],\n",
" y=wsf['uba_km2'],\n",
" mode='lines+markers',\n",
" name='Urban built-up area (km^2)'\n",
" ))\n",
"\n",
" \n",
" fig.update_layout(\n",
" title=\"Urban Built-up Area, 1985-2015\",\n",
" xaxis_title=\"Year\",\n",
" yaxis_title=\"Urban built-up area (km^2)\",\n",
" template='plotly_white',\n",
" showlegend=True,\n",
" hovermode='x'\n",
" )\n",
" fig.show()\n",
" fig.write_html(render_path.replace('.png', '.html'))\n",
" first_area = wsf['uba_km2'].iloc[0]\n",
" latest_area = wsf['uba_km2'].iloc[-1]\n",
" first_year = wsf['Year'].iloc[0]\n",
" latest_year = wsf['Year'].iloc[-1]\n",
" pct_growth = 100 * (latest_area - first_area) / first_area\n",
" print(f\"The city's built-up area grew from {round(first_area, 2)} sq. km in {first_year} to {round(latest_area, 2)} in {latest_year} for {round(pct_growth, 2)}% growth\")\n"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "0b3913e5-3125-4d45-bca2-9b5558d82040",
"metadata": {},
"outputs": [],
"source": [
"#Landcover Graph\n",
"def lc_stats():\n",
" \n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" \n",
" city_inputs = global_inputs.get('city_inputs')\n",
"\n",
" if menu['landcover']: \n",
" lc_stats_file = Path(output_folder / f\"{city}_lc.csv\") \n",
" if not lc_stats_file.exists(): \n",
" print(\"Land Cover stats file does not exist.\")\n",
" return \n",
" \n",
" \n",
" lc = pd.read_csv(lc_stats_file)\n",
"\n",
" # Define colors\n",
" lc_colors = {\n",
" \"Tree cover\": \"#397e48\",\n",
" \"Built-up\": \"#c4281b\",\n",
" \"Grassland\": \"#88af52\",\n",
" \"Bare / sparse vegetation\": \"#a59b8f\",\n",
" \"Cropland\": \"#e49634\",\n",
" \"Water bodies\": \"#429bdf\",\n",
" \"Permanent water bodies\": \"#00008b\",#change this\n",
" \"Mangroves\": \"#90EE90\",#change this\n",
" \"Moss and lichen\": \"#013220\",#change this\n",
" \"Shrubland\": \"#dfc25a\",\n",
" \"Herbaceous wetland\": \"#7d87c4\",\n",
" \"Snow and ice\": \"#F5F5F5\"\n",
" }\n",
" #connect it to the frontend layers.yml\n",
"\n",
" \n",
" total_pixels = lc['Pixel Count'].sum()\n",
"\n",
" \n",
" lc['Percentage'] = lc['Pixel Count'] / total_pixels * 100\n",
"\n",
" \n",
" plt.figure(figsize=(8, 8))\n",
" plt.pie(lc['Pixel Count'], labels=None, colors=[lc_colors[lc] for lc in lc['Land Cover Type']], startangle=90)\n",
" plt.title(\"Land Cover Distribution\")\n",
" \n",
"\n",
" \n",
" render_path = os.path.join(render_folder, \"landcover.png\")\n",
" plt.savefig(render_path)\n",
" plt.close()\n",
" \n",
" \n",
" fig = px.pie(lc, values='Pixel Count', names='Land Cover Type', color='Land Cover Type', color_discrete_map=lc_colors)\n",
" fig.update_traces(textposition='inside', textinfo='percent+label')\n",
" fig.show()\n",
" fig.write_html(render_path.replace('.png', '.html'))\n",
"\n",
" # Dictionary to convert integers to ordinal strings\n",
" ordinal_dict = {1: \"first\", 2: \"second\", 3: \"third\"}\n",
"\n",
" # Print the percentage of the first three highest values\n",
" highest_values = lc.sort_values(by='Percentage', ascending=False).head(3)\n",
" for i, (index, row) in enumerate(highest_values.iterrows()):\n",
" ordinal_str = ordinal_dict.get(i + 1, str(i + 1) + \"th\")\n",
" print(f\"The {ordinal_str} highest landcover value is {row['Land Cover Type']} with {row['Percentage']:.2f}% of the total land area\")\n",
" \n",
" \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "41ad0554-e5ec-4ed7-b374-b9b3a48c6428",
"metadata": {},
"outputs": [],
"source": [
"#Elevation Graph\n",
"def elev_stats():\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
" city_inputs = global_inputs.get('city_inputs')\n",
" if menu['elevation']: \n",
" elev_stats_file = Path(output_folder / f\"{city}_elevation.csv\") \n",
" if not elev_stats_file.exists(): \n",
" print(\"Elevation stats file does not exist.\")\n",
" return \n",
" elev = pd.read_csv(elev_stats_file)\n",
" elev['percent'] = pd.to_numeric(elev['percent'], errors='coerce')\n",
"\n",
" \n",
" elevation = elev.dropna(subset=['legend']).copy() # Create a copy to avoid the warning\n",
"\n",
" # Preprocess the data\n",
" elevation['Percent'] = elevation['percent'].apply(lambda x: f\"{x:.0%}\") \n",
" elevation['Elevation'] = pd.to_numeric(elevation['legend'].str.split('-').str[0], errors='coerce') \n",
" elevation = elevation.dropna(subset=['Elevation']) \n",
" elevation['legend'] = pd.Categorical(elevation['legend'], categories=elevation['legend'].unique()) \n",
" print(elevation)\n",
" # Define colors\n",
" elevation_colors = {\n",
" \"0-2\": \"#f5c4c0\",\n",
" \"2-5\": \"#f19bb4\",\n",
" \"5-10\": \"#ec5fa1\",\n",
" \"10-20\": \"#c20b8a\",\n",
" \"20+\": \"#762175\"\n",
" }\n",
" # Create the pie chart\n",
" plt.figure(figsize=(8, 5))\n",
" wedges, texts, autotexts = plt.pie(elevation['percent'], labels=None, colors=elevation_colors.values(), startangle=90, autopct='%1.0f%%')\n",
" plt.title(\"Elevation Distribution\")\n",
" plt.legend(elevation['Elevation'], loc=\"center left\", bbox_to_anchor=(1, 0, 0.5, 1))\n",
"\n",
" # Add percentage labels\n",
" for autotext in autotexts:\n",
" autotext.set_color('white')\n",
" autotext.set_size(10)\n",
" \n",
" # Save the plot\n",
" render_path = os.path.join(render_folder, \"elevation.png\")\n",
" plt.savefig(render_path, bbox_inches='tight')\n",
" plt.close()\n",
"\n",
" # Create a pie chart with percentage labels\n",
" fig = px.pie(elevation, values='percent', names='Elevation', hole=0.3,\n",
" color_discrete_sequence=list(elevation_colors.values()), \n",
" title=\"Elevation Distribution\", \n",
" labels={'percent': 'Percentage', 'Elevation': 'Elevation Range'},\n",
" template='plotly_white')\n",
"\n",
" # Update layout\n",
" fig.update_layout(legend=dict(orientation=\"h\", x=0.5, y=1.1),\n",
" margin=dict(l=0, r=0, t=50, b=0))\n",
" fig.show()\n",
" fig.write_html(render_path.replace('.png', '.html'))\n",
"\n",
" # Print the percentage of land at an elevation of 20+\n",
" max_percent_index = elevation['percent'].idxmax()\n",
" highest_percent_row = elevation.loc[max_percent_index]\n",
" print(\"Highest percentage entry for Elevation:\")\n",
" print(highest_percent_row)\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"#why are they different?"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "be08dae0-5584-42e6-b59d-9eaa5cd4d6a7",
"metadata": {},
"outputs": [],
"source": [
"#Slope Graph\n",
"def slope_stats():\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
" city_inputs = global_inputs.get('city_inputs')\n",
" if menu['elevation']: \n",
" slope_stats_file = Path(output_folder / f\"{city}_slope.csv\") \n",
" if not slope_stats_file.exists(): \n",
" print(\"Elevation stats file does not exist.\")\n",
" return \n",
" slope = pd.read_csv(slope_stats_file)\n",
" slope['percent'] = pd.to_numeric(slope['percent'], errors='coerce')\n",
"\n",
" # Drop rows with NaN values in 'legend'\n",
" slope = slope.dropna(subset=['legend']).copy()\n",
"\n",
" # Preprocess the data\n",
" slope['Percent'] = slope['percent'].apply(lambda x: f\"{x:.0%}\")\n",
" slope['Slope'] = slope['legend'].str.extract(r\"(\\d+)\").astype(float)\n",
" slope['legend'] = pd.Categorical(slope['legend'], categories=slope['legend'].unique())\n",
"\n",
" # Define colors\n",
" slope_colors = {\n",
" \"0-2\": \"#ffffd4\",\n",
" \"2-5\": \"#fed98e\",\n",
" \"5-10\": \"#fe9929\",\n",
" \"10-20\": \"#d95f0e\",\n",
" \"20+\": \"#993404\"\n",
" }\n",
"\n",
" # Plot the donut chart\n",
" plt.figure(figsize=(8, 5))\n",
" plt.pie(slope['percent'], labels=None, colors=slope_colors.values(), startangle=90)\n",
" plt.title(\"Slope Distribution\")\n",
" plt.legend(slope['Slope'], loc=\"center left\", bbox_to_anchor=(1, 0, 0.5, 1))\n",
" # Save the plot\n",
" render_path = os.path.join(render_folder, \"slope.png\")\n",
" plt.savefig(render_path, bbox_inches='tight')\n",
" plt.close()\n",
"\n",
" # Plotly\n",
" fig = px.pie(slope, values='percent', names='Slope', hole=0.3,\n",
" color_discrete_sequence=list(slope_colors.values()), \n",
" title=\"Slope Distribution\", \n",
" labels={'percent': 'Percentage', 'Slope': 'Slope Range'},\n",
" template='plotly_white')\n",
"\n",
" # Update layout\n",
" fig.update_layout(legend=dict(orientation=\"h\", x=0.5, y=1.1),\n",
" margin=dict(l=0, r=0, t=50, b=0))\n",
" fig.show()\n",
" fig.write_html(render_path.replace('.png', '.html'))\n",
"\n",
" # Print the highest percentage value and consequent class\n",
" max_percent_index = slope['percent'].idxmax()\n",
" highest_percent_row = slope.loc[max_percent_index]\n",
" print(\"Highest percentage entry for Slope:\")\n",
" print(highest_percent_row)\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "ab2851a1-315a-445d-8982-bec89bbe1bc2",
"metadata": {},
"outputs": [],
"source": [
"#Cyclone Graph"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "505a5d75",
"metadata": {},
"outputs": [],
"source": [
"#earthquake timeline\n"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "e56dba45",
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"import geopandas as gpd\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from shapely.geometry import Point, LineString\n",
"\n",
"def flood_timeline():\n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" city_inputs = global_inputs.get('city_inputs')\n",
"\n",
" if menu['flood']: \n",
" flood_archive_path = global_inputs.get('flood_timeline_source')\n",
" flood_archive = gpd.read_file(flood_archive_path)\n",
" flood_archive = flood_archive[flood_archive.is_valid]\n",
" aoi = features.to_crs(flood_archive.crs)\n",
" flood_archive = flood_archive[flood_archive.intersects(aoi.unary_union)]\n",
" fig, ax = plt.subplots()\n",
" flood_archive.plot(column='DEAD', ax=ax, legend=True)\n",
"\n",
" floods = flood_archive[['BEGAN', 'ENDED', 'DEAD', 'DISPLACED', 'MAINCAUSE', 'SEVERITY']]\n",
"\n",
" # Tally of flood events\n",
" print(\"Tally of flood events\")\n",
" print(floods.agg({'DEAD': 'sum', 'DISPLACED': 'sum', 'BEGAN': 'count'}))\n",
"\n",
" duration = (pd.to_datetime(floods['ENDED']) - pd.to_datetime(floods['BEGAN'])).dt.days\n",
"\n",
" # Prepare text for plotting\n",
" flood_text = floods.copy()\n",
" flood_text['severity'] = np.select([flood_text['SEVERITY'] == 1, flood_text['SEVERITY'] == 1.5, flood_text['SEVERITY'] == 2],\n",
" ['Large event', 'Very large event', 'Extreme event'])\n",
" flood_text['duration'] = duration\n",
" flood_text['text'] = flood_text.apply(lambda row: f\"{row['BEGAN']}, {row['severity'].lower()} {row['MAINCAUSE']}, \"\n",
" f\"{row['duration']} days, {row['DEAD']:,} fatalities, {row['DISPLACED']:,} displaced\",\n",
" axis=1)\n",
"\n",
" # Calculate coordinates for text plotting\n",
" flood_text['node_x'] = pd.to_datetime(flood_text['BEGAN']) + pd.to_timedelta(1460 * (2 * (flood_text.index % 2) - 1), unit='D')\n",
" flood_text['node_y'] = 0\n",
"\n",
" # Prepare lines for text plotting\n",
" flood_lines = pd.concat([flood_text[['BEGAN', 'node_x', 'node_y']],\n",
" pd.DataFrame({'BEGAN': flood_text['BEGAN'], 'node_x': flood_text['BEGAN'], 'node_y': 0}),\n",
" pd.DataFrame({'BEGAN': flood_text['BEGAN'], 'node_x': flood_text['BEGAN'], 'node_y': 0})])\n",
"\n",
" # Plotting\n",
" fig, ax = plt.subplots(figsize=(20, 5.833))\n",
" ax.plot(flood_lines['node_x'], flood_lines['node_y'], color='blue', linestyle='-')\n",
" ax.scatter(flood_text['node_x'], flood_text['node_y'], color='red')\n",
" for idx, row in flood_text.iterrows():\n",
" ax.text(row['node_x'] + pd.to_timedelta(30, unit='D'), row['node_y'] - 50, row['text'], rotation=30, ha='center')\n",
" ax.set_xlim(pd.Timestamp('1984-01-01'), pd.Timestamp('2020-12-31'))\n",
" ax.set_ylim(-1800, 1800)\n",
" ax.set_yticks([1825, 3650])\n",
" ax.set_yticklabels([1825, 3650])\n",
" plt.show()\n",
"\n",
"\n",
"\n",
"#workshop this one \n"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "5e8fe622-51c5-4159-906f-74405ebe4101",
"metadata": {},
"outputs": [],
"source": [
"def extract_monthly_stats():\n",
" \n",
" with open(\"global_inputs.yml\", 'r') as f:\n",
" global_inputs = yaml.safe_load(f)\n",
"\n",
" pv_directory = global_inputs.get('solar_graph_source')\n",
"\n",
" \n",
" pv_files = [f for f in os.listdir(pv_directory) if f.endswith('.tif')]\n",
"\n",
" \n",
" monthly_pv = []\n",
"\n",
" aoi = aoi_file.geometry\n",
"\n",
" for f in pv_files:\n",
" pattern = re.compile(r'PVOUT_(\\d{2})')\n",
" match = pattern.search(f)\n",
" if match:\n",
" m = int(match.group(1)) # Extract the month\n",
"\n",
" file_path = os.path.join(pv_directory, f)\n",
"\n",
" with rasterio.open(file_path) as src:\n",
" \n",
" raster_data, raster_transform = mask(src, aoi.geometry, crop=True)\n",
"\n",
" stats = {\n",
" 'month': m,\n",
" 'max': np.nanmax(raster_data),\n",
" 'min': np.nanmin(raster_data),\n",
" 'mean': np.nanmean(raster_data),\n",
" 'sum': np.nansum(raster_data)\n",
" }\n",
"\n",
" \n",
" monthly_pv.append(stats)\n",
" else:\n",
" print(f\"No match found for filename: {f}\")\n",
"\n",
" monthly_pv_df = pd.DataFrame(monthly_pv)\n",
"\n",
" # Sort by 'month'\n",
" monthly_pv_df.sort_values(by='month', inplace=True)\n",
"\n",
" # Calculate daily PV energy yield\n",
" monthly_pv_df['daily'] = monthly_pv_df['mean'] \n",
"\n",
" \n",
" highest_value = monthly_pv_df['daily'].max()\n",
" lowest_value = monthly_pv_df['daily'].min()\n",
"\n",
" \n",
" ratio = highest_value / lowest_value\n",
"\n",
" # Check if the ratio is greater than 2.5\n",
" if ratio > 2.5:\n",
" print(\"Seasonality is high, making solar energy available throughout the year\")\n",
" else:\n",
" print(\"Seasonality is low to moderate, making solar energy available in only some of the months\")\n",
"\n",
" # Plot\n",
" plt.figure(figsize=(10, 6))\n",
" plt.plot(monthly_pv_df['month'], monthly_pv_df['daily'], marker='o', linestyle='-')\n",
" plt.text(1, 4.6, 'Excellent Conditions', color='darkgrey', verticalalignment='bottom', horizontalalignment='left')\n",
" plt.text(1, 3.6, 'Favorable Conditions', color='darkgrey', verticalalignment='bottom', horizontalalignment='left')\n",
" plt.xlabel('Month')\n",
" plt.ylabel('Daily PV energy yield (kWh/kWp)')\n",
" plt.title('Seasonal availability of solar energy')\n",
" plt.xticks(np.arange(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])\n",
" plt.grid(True)\n",
" plt.axhline(y=3.5, linestyle='--', color='black')\n",
" plt.axhline(y=4.5, linestyle='--', color='black')\n",
" plt.tight_layout()\n",
" # Save the plot\n",
" render_path = os.path.join(render_folder, \"_PV_graph.png\")\n",
" plt.savefig(render_path, bbox_inches='tight')\n",
" plt.close()\n",
" #Plotly\n",
" fig = px.line(monthly_pv_df, x='month', y='daily', markers=True)\n",
" fig.add_annotation(x=1, y=4.6, text='Excellent Conditions', showarrow=False, font=dict(color='darkgrey'), xshift=5)\n",
" fig.add_annotation(x=1, y=3.6, text='Favorable Conditions', showarrow=False, font=dict(color='darkgrey'), xshift=5)\n",
" fig.update_xaxes(title='Month', tickvals=list(range(1, 13)), ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])\n",
" fig.update_yaxes(title='Daily PV energy yield (kWh/kWp)', range=[0, 5])\n",
" fig.add_shape(type=\"line\", x0=1, y0=3.5, x1=12, y1=3.5, line=dict(color=\"black\", width=1, dash='dash'))\n",
" fig.add_shape(type=\"line\", x0=1, y0=4.5, x1=12, y1=4.5, line=dict(color=\"black\", width=1, dash='dash'))\n",
" fig.update_layout(title='Seasonal availability of solar energy', xaxis=dict(showgrid=True, zeroline=False),plot_bgcolor='white')\n",
"\n",
" fig.show()\n",
" fig.write_html(render_path.replace('.png', '.html'))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "5e48c01f-f509-4137-8d5b-78a7f336dbf6",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/ipshitakarmakar/mambaforge/envs/geo/share/jupyter/nbconvert/templates/base/display_priority.j2:32: UserWarning:\n",
"\n",
"Your element with mimetype(s) dict_keys(['application/vnd.plotly.v1+json']) is not able to be represented.\n",
"\n"
]
}
],
"source": [
"#Save output\n",
"def export_outputs_to_markdown(notebook_path, output_path):\n",
" # Load the notebook\n",
" with open(notebook_path, 'r', encoding='utf-8') as f:\n",
" notebook_content = nbformat.read(f, as_version=4)\n",
" \n",
" # Initialize the Markdown exporter\n",
" markdown_exporter = MarkdownExporter()\n",
" markdown_exporter.exclude_input = True # Exclude input cells from the Markdown\n",
" \n",
" # Convert the notebook to Markdown format\n",
" markdown_output, resources = markdown_exporter.from_notebook_node(notebook_content)\n",
" \n",
" # Remove the folders for plots and pickles from the resources\n",
" resources.pop('outputs', None)\n",
" resources.pop('output_files', None)\n",
" \n",
" # Write the Markdown content to a file\n",
" with open(output_path, 'w', encoding='utf-8') as f:\n",
" f.write(markdown_output)\n",
"\n",
"# Path to the input notebook\n",
"input_notebook_path = \"/Users/ipshitakarmakar/Documents/GitHub/city-scan-automation/scan_assembly.ipynb\"\n",
"\n",
"# Path for the output Markdown file\n",
"output_markdown_path = \"/Users/ipshitakarmakar/Documents/GitHub/city-scan-automation/output_notebook.md\"\n",
" \n",
"# Export the outputs to Markdown\n",
"export_outputs_to_markdown(input_notebook_path, output_markdown_path)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}