{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "\n",
    "This script will scrape the twitter users that my favourite funny twitter acct(@AbbieEvansXO) herself follows.\n",
    "This is my hack to get a mostly correct list of funny accounts. (Otherwise, I'd have had to individually find them & then create a list - may be later\n",
    "We will later scrape the tweets from this 'following' list of twiiter accounts\n",
    "\n",
    "'''\n",
    "\n",
    "\n",
    "#Import required libraries\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.keys import Keys\n",
    "from bs4 import BeautifulSoup\n",
    "import requests\n",
    "import time\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nelem = browser.find_element_by_tag_name(\"body\")\\n\\nno_of_pagedowns = 20\\n\\nwhile no_of_pagedowns:\\n    elem.send_keys(Keys.PAGE_DOWN)\\n    time.sleep(0.2)\\n    no_of_pagedowns-=1\\n'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#This is going to be the base funny twitter account. We'll first scrape other funny twitter accounts she follows.\n",
    "twitter_username = \"AbbieEvansXO\"\n",
    "driver = webdriver.Chrome('chromedriver.exe')\n",
    "#browser.get(\"https://twitter.com/\" + twitter_username+\"/following\")\n",
    "\n",
    "time.sleep(1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#log in and go to the following page for @AbbieEvansXO\n",
    "driver.get(\"https://www.twitter.com/login\")\n",
    "\n",
    "elem = driver.find_element_by_css_selector(\".js-initial-focus\")\n",
    "elem.clear()\n",
    "elem.send_keys('MY_EMAIL')\n",
    "\n",
    "elem = driver.find_element_by_css_selector(\".js-password-field\")\n",
    "elem.clear()\n",
    "elem.send_keys('MY_PASSWORD')\n",
    "\n",
    "elem.send_keys(Keys.RETURN)\n",
    "time.sleep(2)\n",
    "\n",
    "\n",
    "driver.get(\"https://twitter.com/\" + twitter_username+\"/following\")\n",
    "\n",
    "\n",
    "#Looks like the page is dynamically loaded. Keep scrolling to end of page\n",
    "for x in range(1, 10):\n",
    "    driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
    "    time.sleep(2)\n",
    "\n",
    "#Parse page using Beautifulsoup\n",
    "pagesrc = driver.page_source\n",
    "soup = BeautifulSoup(pagesrc, \"lxml\")\n",
    "\n",
    "username = []\n",
    "for users in soup.find_all(\"b\", class_ = \"u-linkComplex-target\"):\n",
    "    username.append(users.text)\n",
    "    \n",
    "userprofile = []\n",
    "for users in soup.find_all(\"a\", class_ = \"fullname ProfileNameTruncated-link u-textInheritColor js-nav\"):\n",
    "    userprofile.append(users.text.strip())\n",
    "\n",
    "#Since I follow her myself, I'll remove my account. It shows up first in following\n",
    "username.pop(0)\n",
    "username.pop(0)\n",
    "\n",
    "userprofile.pop(0)\n",
    "\n",
    "# Create data frame:\n",
    "following = pd.DataFrame(list(zip(userprofile, username)), columns = [\"Profile\", \"Username\"])\n",
    "following[\"username_upper\"] = followers[\"Username\"].str.upper()\n",
    "following = followers.sort_values([\"username_upper\"])\n",
    "del following[\"username_upper\"]\n",
    "following.head()\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Saving the output dataframe to CSV file. These accounts will be scraped in next script\n",
    "following.to_csv(\"Following list.csv\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}