{ "cells": [ { "cell_type": "code", "execution_count": 8, "id": "17446059", "metadata": {}, "outputs": [], "source": [ "# make sure you have done pip install selenium\n", "from selenium import webdriver\n", "from selenium.webdriver.common.by import By" ] }, { "cell_type": "code", "execution_count": 28, "id": "ad942456", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l3/wlgnl3zd5hl46kfjq1yh3k9r0000gn/T/ipykernel_62143/3319012562.py:2: DeprecationWarning: executable_path has been deprecated, please pass in a Service object\n", " driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver')\n" ] } ], "source": [ "# create the driver by passing in the path of the chromedriver\n", "driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver')" ] }, { "cell_type": "code", "execution_count": 10, "id": "0c96a2c3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l3/wlgnl3zd5hl46kfjq1yh3k9r0000gn/T/ipykernel_62143/37542349.py:7: DeprecationWarning: executable_path has been deprecated, please pass in a Service object\n", " driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver', options=options)\n" ] } ], "source": [ "# we can also add options \n", "# List of all options available here https://peter.sh/experiments/chromium-command-line-switches/\n", "options = webdriver.ChromeOptions()\n", "options.add_argument(\"--headless\") # runs the browser without a UI\n", "\n", "# create driver with the options\n", "driver = webdriver.Chrome('/Users/mckayjohns/Downloads/chromedriver', options=options)" ] }, { "cell_type": "code", "execution_count": 29, "id": "34994207", "metadata": {}, "outputs": [], "source": [ "# Go to a webpage\n", "driver.get('https://www.serebii.net/pokemon/gen1pokemon.shtml')" ] }, { "cell_type": "code", "execution_count": 30, "id": "24617197", "metadata": {}, "outputs": [], "source": [ "# First we'll get all of the pokemon\n", "# We will use css selectors to do this\n", "# opening up the chrome dev tools by hitting CMD + Option + i on Mac or you can right click inspect\n", "# understanding a little bit of css and html will help this process\n", "# We will get the table element\n", "from bs4 import BeautifulSoup\n", "\n", "page_source = BeautifulSoup(driver.page_source, 'html.parser')\n", "\n", "pokemon_table = page_source.select_one('table[class=\"dextable\"]')\n" ] }, { "cell_type": "code", "execution_count": 31, "id": "ea2ab864", "metadata": {}, "outputs": [], "source": [ "# Now lets get all of the individual table rows\n", "pokemon = page_source.select('table[class=\"dextable\"] tbody tr')" ] }, { "cell_type": "code", "execution_count": 32, "id": "c881f81e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "