{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Working with HTML" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "import urllib.request\n", "aliceUrl = \"http://www.gutenberg.org/files/11/11-h/11-h.htm\"\n", "aliceString = urllib.request.urlopen(aliceUrl).read()" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'\\r\\n\\r\\n\\r\\n\\r\\n\\r\\n \\r\\n\\r\\n'\n" ] } ], "source": [ "print(aliceString[:200],\" ... \",aliceString[-200:]) # have a peek" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\n", "\r\n", " Alice's Adventures in Wonderland, by Lewis Carroll\r\n", " \n", "\r\n", "