{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "We are combining everything together. We will change the search term below and search for related subreddits (up to limit) and then parse 5 items in each and show thumbnail (if exists)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Change these values\n", "search_term = 'puppies'\n", "sr_limit = 10\n", "display_here = True\n", "save_as_html = True\n", "post_filter = 'top' # You can use 'top', 'hot', 'new', 'rising', 'random'\n", "period = 'month' # You can use 'hour', 'day', 'week', 'month', 'year', 'all' (as a top filter)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# User-specific settings\n", "import os\n", "reddit_username = 'alpscode'\n", "reddit_password = os.environ['reddit_pass']\n", "app_id = 'QC-6FYIEH_kqUw'\n", "app_secret = os.environ['reddit_secret']\n", "reddit_header = {'user-agent': 'parser by alpscode'}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import requests\n", "base_url = 'https://www.reddit.com/'\n", "data = {'grant_type': 'password', 'username': reddit_username, 'password': reddit_password}\n", "client_auth = requests.auth.HTTPBasicAuth(app_id, app_secret)\n", "response = requests.post(base_url + 'api/v1/access_token',\n", " data=data,\n", " headers=reddit_header,\n", " auth=client_auth)\n", "values = response.json()\n", "api_url = 'https://oauth.reddit.com'\n", "token = 'bearer {}'.format(values['access_token'])\n", "headers = {'Authorization': token, 'User-Agent': 'parser by {}'.format(reddit_username)}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parsing following subreddits\n", "['puppies', 'aww', 'dogpictures', 'corgi', 'lookatmydog', 'pics', 'dogs', 'Eyebleach', 'goldenretrievers', 'husky']\n" ] } ], "source": [ "payload = {'q': search_term, 'limit': sr_limit}\n", "response = requests.get(api_url + '/subreddits/search', headers=headers, params=payload)\n", "js = response.json()\n", "\n", "sr = []\n", "for i in range(js['data']['dist']):\n", " sr.append(js['data']['children'][i]['data']['display_name'])\n", "\n", "print('Parsing following subreddits')\n", "print(sr)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "payload = {'t': period, 'limit': 5}\n", "imghtml = ''\n", "for s in sr:\n", " imghtml += '

{}

'.format(s)\n", " r = requests.get(api_url + '/r/{}/{}'.format(s, post_filter), headers=headers, params=payload)\n", " js = r.json()\n", " for i in range(js['data']['dist']):\n", " thumbnail = js['data']['children'][i]['data']['thumbnail']\n", " if thumbnail in ['', 'self', 'nsfw', 'spoiler', 'default']:\n", " continue\n", " imghtml += ''.format(\n", " js['data']['children'][i]['data']['url'],\n", " thumbnail,\n", " js['data']['children'][i]['data']['title'],\n", " )\n", " imghtml += '
'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "

puppies

aww

dogpictures

corgi

lookatmydog

pics

dogs

Eyebleach

goldenretrievers

husky

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "if display_here:\n", " from IPython.core.display import display, HTML\n", " display(HTML(imghtml))\n", "if save_as_html:\n", " with open(\"{}.html\".format(search_term), \"w\", encoding='utf-8') as html_page:\n", " html_page.write(imghtml)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }