{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from cw.io import read_cw_data\n", "import matplotlib.pylab as plt\n", "import numpy as np\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Read and analyse the crowdwater data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total number of cw reading 34415\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/esowc31/anand/CW4Floods/cw/io.py:45: DtypeWarning: Columns (2,3,7,11,16,28,36,38,46,48) have mixed types. Specify dtype option on import or set low_memory=False.\n", " data = pd.read_csv(file_name)\n" ] } ], "source": [ "cw_data = read_cw_data()\n", "print(f\"Total number of cw reading {len(cw_data)}\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total number of cw reading without NA 7321\n" ] } ], "source": [ "cw_data = cw_data[cw_data.WATER_LEVEL.notna()]\n", "print(f\"Total number of cw reading without NA {len(cw_data)}\")\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total number of cw reading without NA and false 7320\n" ] } ], "source": [ "cw_data = cw_data[cw_data.WATER_LEVEL != \"false\" ]\n", "print(f\"Total number of cw reading without NA and false {len(cw_data)}\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "All the columns in cw_data Index(['ID', 'ROOT_ID', 'LATITUDE', 'LONGITUDE', 'CATEGORY', 'IMAGE',\n", " 'FLOW_TYPE', 'SNOW_ICE_PRESENT', 'MOISTURE', 'WATER_LEVEL', 'PP_TYPE',\n", " 'PP_RIVER_STAGNENT', 'PP_STREAM_OBSERVATION_TIME',\n", " 'PP_STREAM_PROPORTIONS', 'PP_SHORE_PLOTSIZE', 'PP_AMOUNT',\n", " 'WL_ADVANCED', 'WL_WIDTH', 'WL_DEPTH', 'STREAMTYPE_TYPE',\n", " 'STREAMTYPE_DRINK_WATER', 'STREAMTYPE_SWIMMING', 'STREAMTYPE_BUILTIN',\n", " 'WL_MATERIAL', 'STREAMTYPE_WATERCOLOR', 'STREAMTYPE_GROUNDVISIBLE',\n", " 'STREAMTYPE_ANIMALS', 'STREAMTYPE_POLLUTION', 'STREAMTYPE_DRIESUP',\n", " 'STREAMTYPE_NAME', 'WL_METHOD', 'WL_FLOW_VELOCITY', 'WL_DISTANCE',\n", " 'WL_TIME_A', 'WL_TIME_B', 'WL_TIME_C', 'WL_DISTANCE_B', 'WL_DISTANCE_C',\n", " 'PP_ADVANCED', 'PP_ADV_PET', 'PP_ADV_POSOFT', 'PP_ADV_POHARD',\n", " 'PP_ADV_PS', 'PP_ADV_PSE', 'PP_ADV_MULTILAYER', 'PP_ADV_OTHER',\n", " 'PP_PLASTIC_REMOVED_CHECK', 'PHYSICAL_SCALE_UNIT',\n", " 'PHYSICAL_SCALE_LEVEL', 'DESCRIPTION', 'SPOTTED_AT'],\n", " dtype='object')\n" ] } ], "source": [ "print(f\"All the columns in cw_data {cw_data.columns[:]}\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "They are atleast 50 stations with 20 or more readings, \n", " this includes the stations on entire globe and not only to europe\n" ] } ], "source": [ "## Sort stations with highest frequency of the data\n", "\n", "reading_frequency = np.unique(cw_data.ROOT_ID, return_counts = True)\n", "sorted_freq = sorted(zip(reading_frequency[1], reading_frequency[0]), reverse=True)\n", "tuples = zip(*sorted_freq)\n", "freq, station_id = [ list(tuple) for tuple in tuples]\n", "\n", "# Frequency distribution for 50 stations with highest number of readings\n", "fig, (ax1, ax2) = plt.subplots(1,2, figsize = (16, 6))\n", "ax1.plot(freq)\n", "ax2.plot(np.arange(20, 100), freq[20:100])\n", "ax2.axhline(20)\n", "plt.grid()\n", "plt.show()\n", "\n", "print(\"They are atleast 50 stations with 20 or more readings, \\n this includes the stations on entire globe and not only to europe\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Most of the values are in the range -1 to +1\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Lets see how are the values distributed\n", "fig, ax = plt.subplots(1,1, figsize = (10, 4))\n", "plt.hist(cw_data.WATER_LEVEL, bins = 13)\n", "\n", "print(f\"Most of the values are in the range -1 to +1\")" ] } ], "metadata": { "interpreter": { "hash": "692f5383e91b71d62ecfc91266ec3ccdde3d070cc847d400f4b3a8a0583f7858" }, "kernelspec": { "display_name": "Python 3.9.0 ('CW4F')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }