{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Using the Google Books REST API \n", "\n", "#### API documentation:\n", "\n", "https://developers.google.com/books/docs/v1/getting_started#REST\n", "\n", "https://developers.google.com/books/docs/v1/using#query-params" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Querying the Google Books API with an ISBN and loading the response\n", "\n", "from urllib.request import urlopen\n", "import json\n", "\n", "url = 'https://www.googleapis.com/books/v1/volumes?q=isbn:162779073X'\n", "\n", "json_text = urlopen(url).read().decode('utf8')\n", "\n", "print(json_text)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Parsing the JSON text data to create a JSON object\n", "\n", "json_object = json.loads(json_text)\n", "\n", "json_object" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Viewing the top-level keys in the JSON object\n", "\n", "for key in json_object:\n", " print(key)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Viewing the list of 'items', which contains metadata for a single book \n", "\n", "json_object['items']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Viewing keys included in the metadata entry for a single book\n", "\n", "for key in json_object['items'][0]:\n", " print(key)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Viewing keys included in a single book's 'volumeInfo'\n", "\n", "for key in json_object['items'][0]['volumeInfo']:\n", " print(key)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Viewing the contents of 'volumeInfo' for a single book\n", "\n", "json_object['items'][0]['volumeInfo']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Assigning the contents of 'volumeInfo' to the variable 'volume_info'\n", "\n", "item = json_object['items'][0]\n", "\n", "volume_info = item['volumeInfo']\n", "\n", "volume_info" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Accessing the contents of several metadata fields\n", "\n", "volume_info['title']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "volume_info['publisher']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "volume_info['publishedDate']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "volume_info['description']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Note that the 'authors' field is formatted as a list\n", "\n", "volume_info['authors']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Reformatting the 'authors' list as a pipe-separated string\n", "\n", "'|'.join(volume_info['authors'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Combining several fields to create an example 'row'\n", "\n", "title = volume_info['title']\n", "authors = '|'.join(volume_info['authors'])\n", "published_date = volume_info['publishedDate']\n", "publisher = volume_info['publisher']\n", "description = volume_info['description']\n", "\n", "row = [title, authors, published_date, publisher, description]\n", "\n", "row " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using str.format() to construct API query URLs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"This is a very {} sentence.\".format('informative')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "adjectives = ['informative', 'short', 'true', 'beautiful', 'misleading']\n", "\n", "for adjective in adjectives:\n", " sentence = \"This is a very {} sentence.\".format(adjective)\n", " print(sentence)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "adverbs = ['very', 'fairly', 'definitively', 'kinda']\n", "\n", "adjectives = ['informative', 'short', 'true', 'beautiful', 'misleading']\n", "\n", "for adverb in adverbs:\n", " for adjective in adjectives:\n", " sentence = \"This is a {} {} sentence.\".format(adverb, adjective)\n", " print(sentence)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Generating a list of API query URLs\n", "\n", "isbns = ['1-6277-9073-X', '0-8050-9205-6', '0-307-40802-7', '0-307-35419-9', '0-307-35428-8', '0-9779440-0-X']\n", "\n", "for isbn in isbns:\n", " sanitized_isbn = isbn.replace('-', '') # removing hyphens from each ISBN\n", " url = 'https://www.googleapis.com/books/v1/volumes?q=isbn:{}'.format(sanitized_isbn)\n", " print(url)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## *Assignment:*\n", "\n", "1. Choose an author and create a list of their books' ISBNs. (Hint: Use Wikipedia.)\n", "\n", "2. Query the Google Books API for each ISBN in the list.\n", "\n", "3. Extract several metadata fields and add them to a list of lists.\n", "\n", "4. Write the list of lists to a CSV file." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Google Books Free Text Query" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from urllib.request import urlopen\n", "import json\n", "\n", "url = 'https://www.googleapis.com/books/v1/volumes?q=Glenn+Greenwald&maxResults=40'\n", "\n", "json_text = urlopen(url).read().decode('utf8')\n", "\n", "json_object = json.loads(json_text)\n", "\n", "json_object" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(json_object['items'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for item in json_object['items']:\n", " print(item['volumeInfo']['title'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for item in json_object['items']:\n", " volume_info = item['volumeInfo']\n", " print('|'.join(volume_info['authors'])) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for item in json_object['items']:\n", " volume_info = item['volumeInfo']\n", " try:\n", " print('|'.join(volume_info['authors'])) \n", " except:\n", " print('** No Author **')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Google Books Author Query" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from urllib.request import urlopen\n", "import json\n", "\n", "url = 'https://www.googleapis.com/books/v1/volumes?q=inauthor:\"Glenn+Greenwald\"&maxResults=40'\n", "\n", "json_text = urlopen(url).read().decode('utf8')\n", "\n", "json_object = json.loads(json_text)\n", "\n", "json_object" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for item in json_object['items']:\n", " volume_info = item['volumeInfo']\n", " print(volume_info['title'])\n", " try:\n", " print('|'.join(volume_info['authors']))\n", " except:\n", " print('')\n", " print()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## *Assignment:*\n", "\n", "1. Create a list of authors.\n", "\n", "2. Query the Google Books API for each author in the list.\n", "\n", "3. Extract several metadata fields for each book and add them all to a single list of lists.\n", "\n", "4. Write the list of lists to a CSV file." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }