{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Google Suggestions\n", "\n", "This is a demo with tooltips. It provides a set of Google queries starting with \"What if %countryname% ...\". The result will be shown on the map when you hover over the country.\n", "\n", "To try queries starting with other words change the START_OF_QUERY constant. In this case, you need to specify the path to your chromedriver (PATH_TO_CHROMEDRIVER), which is used to get data. It can be downloaded [here](https://chromedriver.chromium.org/downloads)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:53.286565Z", "iopub.status.busy": "2024-04-17T07:39:53.286476Z", "iopub.status.idle": "2024-04-17T07:39:53.652745Z", "shell.execute_reply": "2024-04-17T07:39:53.652513Z" } }, "outputs": [], "source": [ "import os\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", "from selenium import webdriver\n", "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.support import expected_conditions as EC\n", "from selenium.common.exceptions import WebDriverException\n", "\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:53.654276Z", "iopub.status.busy": "2024-04-17T07:39:53.654145Z", "iopub.status.idle": "2024-04-17T07:39:53.656453Z", "shell.execute_reply": "2024-04-17T07:39:53.656280Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:53.669520Z", "iopub.status.busy": "2024-04-17T07:39:53.669348Z", "iopub.status.idle": "2024-04-17T07:39:53.670794Z", "shell.execute_reply": "2024-04-17T07:39:53.670611Z" } }, "outputs": [], "source": [ "START_OF_QUERY = 'What if'\n", "SUGGESTION_STUB = 'no data'\n", "PATH_TO_CHROMEDRIVER = ''\n", "PATH_TO_DATA = \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/google_suggestions_2020.csv\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:53.671856Z", "iopub.status.busy": "2024-04-17T07:39:53.671740Z", "iopub.status.idle": "2024-04-17T07:39:53.673834Z", "shell.execute_reply": "2024-04-17T07:39:53.673652Z" } }, "outputs": [], "source": [ "def get_naturalearth_data(data_type=\"admin_0_countries\", columns=[\"NAME\", \"geometry\"]):\n", " import shapefile\n", " from shapely.geometry import shape\n", "\n", " naturalearth_url = \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/\" + \\\n", " \"data/naturalearth/{0}/data.shp?raw=true\".format(data_type)\n", " sf = shapefile.Reader(naturalearth_url)\n", "\n", " gdf = gpd.GeoDataFrame(\n", " [\n", " dict(zip([field[0] for field in sf.fields[1:]], record))\n", " for record in sf.records()\n", " ],\n", " geometry=[shape(s) for s in sf.shapes()]\n", " )[columns]\n", " gdf.columns = [col.lower() for col in gdf.columns]\n", "\n", " return gdf" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:53.674925Z", "iopub.status.busy": "2024-04-17T07:39:53.674809Z", "iopub.status.idle": "2024-04-17T07:39:53.676542Z", "shell.execute_reply": "2024-04-17T07:39:53.676345Z" } }, "outputs": [], "source": [ "def split_list_to_bunches(l, *, bunch_size=1):\n", " if bunch_size < 1:\n", " raise Exception('Wrong input')\n", " bunches = []\n", " i = 0\n", " while i < len(l):\n", " bunches.append([])\n", " for j in range(bunch_size):\n", " if i == len(l):\n", " break\n", " bunches[-1].append(l[i])\n", " i += 1\n", " return bunches" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:53.677592Z", "iopub.status.busy": "2024-04-17T07:39:53.677477Z", "iopub.status.idle": "2024-04-17T07:39:53.680103Z", "shell.execute_reply": "2024-04-17T07:39:53.679929Z" } }, "outputs": [], "source": [ "def get_suggestions_data(countries, *, start_of_query='', driver_path='', data_path=''):\n", " BUNCH_SIZE = 20\n", " if not os.path.isfile(driver_path):\n", " return pd.read_csv(data_path)\n", " suggestions = []\n", " for countries_bunch in split_list_to_bunches(countries, bunch_size=BUNCH_SIZE):\n", " with webdriver.Chrome(executable_path=driver_path) as driver:\n", " driver.get('http://www.google.com')\n", " for country in countries_bunch:\n", " query = '{0} {1} '.format(start_of_query, country).lower()\n", " suggestion = SUGGESTION_STUB\n", " try:\n", " input_elem = WebDriverWait(driver, 2).until(\n", " EC.presence_of_element_located((By.CSS_SELECTOR, 'input[role=\"combobox\"]'))\n", " )\n", " input_elem.send_keys(query)\n", " li_elem = WebDriverWait(driver, 1).until(\n", " EC.presence_of_element_located((By.CSS_SELECTOR, 'ul[role=\"listbox\"]>li'))\n", " )\n", " text_container_elems = driver.find_elements_by_css_selector('ul[role=\"listbox\"] div[role=\"option\"]')\n", " suggestion = next(\n", " elem.get_attribute('textContent') for elem in text_container_elems\n", " if elem.get_attribute('textContent').find(start_of_query.lower()) == 0\n", " )\n", " except WebDriverException:\n", " pass\n", " except StopIteration:\n", " pass\n", " suggestions.append(suggestion)\n", " driver.refresh()\n", " return pd.DataFrame(dict(country=countries, suggestion=suggestions))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:53.681417Z", "iopub.status.busy": "2024-04-17T07:39:53.681127Z", "iopub.status.idle": "2024-04-17T07:39:54.732118Z", "shell.execute_reply": "2024-04-17T07:39:54.731888Z" } }, "outputs": [], "source": [ "world_gdf = get_naturalearth_data(columns=[\"NAME\", \"ISO_A3\", \"CONTINENT\", \"POP_EST\", \"GDP_MD\", \"geometry\"])\n", "\n", "suggestions_df = get_suggestions_data(world_gdf['name'], \\\n", " start_of_query=START_OF_QUERY, \\\n", " driver_path=PATH_TO_CHROMEDRIVER, \\\n", " data_path=PATH_TO_DATA)\n", "\n", "df = suggestions_df.merge(world_gdf, left_on='country', right_on='name')\n", "gdf = gpd.GeoDataFrame(df, geometry='geometry')\n", "\n", "suggestions_gdf = gdf[~(gdf.suggestion == SUGGESTION_STUB)]\n", "no_data_gdf = gdf[gdf.suggestion == SUGGESTION_STUB]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:39:54.733597Z", "iopub.status.busy": "2024-04-17T07:39:54.733503Z", "iopub.status.idle": "2024-04-17T07:39:54.788761Z", "shell.execute_reply": "2024-04-17T07:39:54.788559Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot() + \\\n", " geom_map(data=no_data_gdf, fill='gray', size=.2, alpha=.5,\n", " tooltips=layer_tooltips().line('@name')) + \\\n", " geom_map(aes(fill='suggestion'), data=suggestions_gdf, size=.2, alpha=.5,\n", " tooltips=layer_tooltips().line('@name').line('Google suggestion|@suggestion')) + \\\n", " scale_fill_discrete(name='Google suggestion') + \\\n", " ggtitle('First Google Query Suggestion Starting with \\'%s \\'' % START_OF_QUERY) + \\\n", " ggsize(700, 500) + \\\n", " theme_void() + theme(legend_position='none')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Gray means there were no suggestions for that query." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }