{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "baseURL = \"http://corpus-db.org\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def getMeta(subject): \n", " metaResponse = requests.get(baseURL+\"/api/subject/\"+subject)\n", " meta = json.loads(metaResponse.text)\n", " return meta" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "detectiveMeta = getMeta('Detective and mystery stories')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "586" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "englishDetectives = [item for item in detectiveMeta if item['languages'] == \"['en']\"]\n", "len(englishDetectives)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Doyle, Arthur Conan The Return of Sherlock Holmes 108.0\n", "Collins, Wilkie The Haunted Hotel: A Mystery of Modern Venice 170.0\n", "Rohmer, Sax The Insidious Dr. Fu Manchu 173.0\n", "Chesterton, G. K. (Gilbert Keith) The Innocence of Father Brown 204.0\n", "Doyle, Arthur Conan The Return of Sherlock Holmes 221.0\n", "Chesterton, G. K. (Gilbert Keith) The Wisdom of Father Brown 223.0\n", "Doyle, Arthur Conan A Study in Scarlet 244.0\n", "Gaboriau, Emile The Count's Millions 305.0\n", "Rinehart, Mary Roberts Where There's a Will 330.0\n", "Michelson, Miriam In the Bishop's Carriage 481.0\n" ] } ], "source": [ "for item in englishDetectives[:10]:\n", " print(item['author'], item['title'], item['id'])" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "def getText(idStr): \n", " response = requests.get(baseURL+'/api/id/'+idStr+'/fulltext')\n", " text = json.loads(response.text)[0]['text']\n", " return text" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "tenEnglishDetectives = englishDetectives[:10]" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "for item in tenEnglishDetectives:\n", " text = getText(item['id'])\n", " item['text'] = text" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\n\\n\\n\\n\\n\\n\\n\\nTHE RETURN OF SHERLOCK HOLMES,\\n\\nA Collection of Holmes Adventures\\n\\n\\nby Sir Arthur Conan Doyle\\n\\n\\n\\n\\nCONTENTS:\\n\\n The Adventure Of The Empty House\\n\\n The Adventure Of The Norwood Builder\\n\\n '" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tenEnglishDetectives[0]['text'][:200]" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "list" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(tenEnglishDetectives)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4605130" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "open('detectives.json', 'w').write(json.dumps(tenEnglishDetectives))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" } }, "nbformat": 4, "nbformat_minor": 2 }