{ "cells": [ { "cell_type": "code", "execution_count": 8, "id": "17446059", "metadata": {}, "outputs": [], "source": [ "# make sure you have done pip install selenium\n", "from selenium import webdriver\n", "from selenium.webdriver.common.by import By" ] }, { "cell_type": "code", "execution_count": 28, "id": "ad942456", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l3/wlgnl3zd5hl46kfjq1yh3k9r0000gn/T/ipykernel_62143/3319012562.py:2: DeprecationWarning: executable_path has been deprecated, please pass in a Service object\n", " driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver')\n" ] } ], "source": [ "# create the driver by passing in the path of the chromedriver\n", "driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver')" ] }, { "cell_type": "code", "execution_count": 10, "id": "0c96a2c3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l3/wlgnl3zd5hl46kfjq1yh3k9r0000gn/T/ipykernel_62143/37542349.py:7: DeprecationWarning: executable_path has been deprecated, please pass in a Service object\n", " driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver', options=options)\n" ] } ], "source": [ "# we can also add options \n", "# List of all options available here https://peter.sh/experiments/chromium-command-line-switches/\n", "options = webdriver.ChromeOptions()\n", "options.add_argument(\"--headless\") # runs the browser without a UI\n", "\n", "# create driver with the options\n", "driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver', options=options)" ] }, { "cell_type": "code", "execution_count": 29, "id": "34994207", "metadata": {}, "outputs": [], "source": [ "# Go to a webpage\n", "driver.get('https://www.serebii.net/pokemon/gen1pokemon.shtml')" ] }, { "cell_type": "code", "execution_count": 30, "id": "24617197", "metadata": {}, "outputs": [], "source": [ "# First we'll get all of the pokemon\n", "# We will use css selectors to do this\n", "# opening up the chrome dev tools by hitting CMD + Option + i on Mac or you can right click inspect\n", "# understanding a little bit of css and html will help this process\n", "# We will get the table element\n", "from bs4 import BeautifulSoup\n", "\n", "page_source = BeautifulSoup(driver.page_source, 'html.parser')\n", "\n", "pokemon_table = page_source.select_one('table[class=\"dextable\"]')\n" ] }, { "cell_type": "code", "execution_count": 31, "id": "ea2ab864", "metadata": {}, "outputs": [], "source": [ "# Now lets get all of the individual table rows\n", "pokemon = page_source.select('table[class=\"dextable\"] tbody tr')" ] }, { "cell_type": "code", "execution_count": 32, "id": "c881f81e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", "\n", "\t\t#001\n", "\t\t\n", "
\n", "\n", "\n", "Bulbasaur\n", "\n", "\n", " \n", "\n", "\n", "Overgrow
Chlorophyll\n", "45\n", "49\n", "49\n", "65\n", "65\n", "45\n", "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# lets look at the first pokemon since the first two items are actually the table headers\n", "pokemon[2]" ] }, { "cell_type": "code", "execution_count": 43, "id": "094283eb", "metadata": {}, "outputs": [], "source": [ "# let's learn now how to start interacting with the page\n", "\n", "driver.get('https://www.random.org/integers/')" ] }, { "cell_type": "code", "execution_count": 44, "id": "6a00201e", "metadata": {}, "outputs": [], "source": [ "# text actions\n", "# instead of using beautiful soup we will just use the selenium defaults to find the elements\n", "num_numbers = driver.find_element(By.CSS_SELECTOR, 'input[name=\"num\"]')" ] }, { "cell_type": "code", "execution_count": 45, "id": "6ac7d4b8", "metadata": {}, "outputs": [], "source": [ "# get rid of current text\n", "num_numbers.clear()\n", "\n", "# insert new text\n", "num_numbers.send_keys('5')" ] }, { "cell_type": "code", "execution_count": 47, "id": "92a8c71e", "metadata": {}, "outputs": [], "source": [ "# let's click on the button \n", "submit_button = driver.find_element(By.CSS_SELECTOR, 'input[value=\"Get Numbers\"]')\n", "\n", "submit_button.click()" ] }, { "cell_type": "code", "execution_count": 48, "id": "fae9c855", "metadata": {}, "outputs": [], "source": [ "driver.close()" ] }, { "cell_type": "code", "execution_count": null, "id": "4cc1f8d3", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }