{ "cells": [ { "cell_type": "markdown", "id": "stopped-government", "metadata": {}, "source": [ "---\n", "## Let's now play with JSON to access **the NARA Catalog Morgenthau Diary** Vol.696: Jan. 22 - Jan. 26, 1944:\n", "### - we use an API call where we specify a NARA fileUnit ID directly: 28277406\n", "### - \n", "\n", "\n", "|
NARA Catalog Hierarchy
|
JSON Structure for the Query of FDR FileUnit 28277406:\"The Morgenthau Diaries\"
|\n", "| ----- | ----- |\n", "| | |" ] }, { "cell_type": "code", "execution_count": 22, "id": "liberal-tracker", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'_index': 'nac-records2',\n", " '_type': '_doc',\n", " '_id': '28277406',\n", " '_score': 28.70905,\n", " '_source': {'record': {'generalRecordsTypes': ['Textual Records'],\n", " 'levelOfDescription': 'fileUnit',\n", " 'recordType': 'description',\n", " 'ancestors': [{'collectionIdentifier': 'FDR-MORGEN',\n", " 'inclusiveStartDate': {'year': 1866, 'logicalDate': '1866-01-01'},\n", " 'distance': 2,\n", " 'levelOfDescription': 'collection',\n", " 'inclusiveEndDate': {'year': 1960, 'logicalDate': '1960-12-31'},\n", " 'title': 'Henry Morgenthau, Jr. Papers',\n", " 'naId': 589184},\n", " {'inclusiveStartDate': {'month': 4,\n", " 'year': 1933,\n", " 'day': 27,\n", " 'logicalDate': '1933-04-27'},\n", " 'distance': 1,\n", " 'levelOfDescription': 'series',\n", " 'creators': [{'creatorType': 'Most Recent',\n", " 'heading': 'Morgenthau, Henry, 1891-1967',\n", " 'deathDate': {'year': 1967, 'logicalDate': '1967-12-31'},\n", " 'authorityType': 'person',\n", " 'birthDate': {'year': 1891, 'logicalDate': '1891-01-01'},\n", " 'naId': 10581613}],\n", " 'inclusiveEndDate': {'month': 7,\n", " 'year': 1945,\n", " 'day': 21,\n", " 'logicalDate': '1945-07-21'},\n", " 'title': 'Diaries of Henry Morgenthau, Jr.',\n", " 'naId': 589213}],\n", " 'title': 'Volume 696, January 22 – January 26, 1944',\n", " 'naId': 28277406,\n", " 'physicalOccurrences': [{'mediaOccurrences': [{'generalMediaTypes': ['Loose Sheets'],\n", " 'specificMediaType': 'Paper'}]}]}},\n", " 'fields': {'totalDigitalObjects': [1],\n", " 'firstDigitalObject': [{'objecId': '28277407',\n", " 'objectUrl': 'https://s3.amazonaws.com/NARAprodstorage/opastorage/live/6/2774/28277406/content/presidential-libraries/roosevelt/FDR-MORGEN/589213/md0981.pdf',\n", " 'objectType': 'Portable Document File (PDF)'}]},\n", " 'sort': [28.70905, '28277406']}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import requests\n", "import json\n", "\n", "# Going down the JSON tree to the RESULT section:\n", "response = requests.get(\"https://catalog.archives.gov/api/v2/records/search?q=*&abbreviated=true&limit=200&naId=28277406\", \n", " headers={\"Content-Type\": \"application/json\", \n", " \"x-api-key\": \"API-KEY\"})\n", "data = response.json()\n", "with open('data.FU.json', 'w') as f:\n", " json.dump(data, f)\n", " \n", "result = data[\"body\"][\"hits\"][\"hits\"][0]\n", "fields = result[\"fields\"]\n", "record = result[\"_source\"][\"record\"]\n", "\n", "result" ] }, { "cell_type": "markdown", "id": "af7ea26f-eef5-4994-a41b-ed8b9a110268", "metadata": {}, "source": [ "## How to extract the TITLE value from the JSON tree" ] }, { "cell_type": "code", "execution_count": 20, "id": "76320f0e-03c2-4e93-8268-bf64c4fda1ed", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "- TITLE VALUE: Volume 696, January 22 – January 26, 1944\n" ] } ], "source": [ "title_value = record[\"title\"]\n", "\n", "# Printing the Title Value\n", "print( \"- TITLE VALUE:\", title_value )" ] }, { "cell_type": "markdown", "id": "7d0d5b50-207b-4630-89a1-ef23ae5276a6", "metadata": {}, "source": [ "## How to extract the URL value from the FILE section of the objects/object portion of the JSON tree" ] }, { "cell_type": "code", "execution_count": 27, "id": "7b9c740f-6831-4ad6-9948-be3e5dd14a5e", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "- URL VALUE: https://s3.amazonaws.com/NARAprodstorage/opastorage/live/6/2774/28277406/content/presidential-libraries/roosevelt/FDR-MORGEN/589213/md0981.pdf\n", "- NAME VALUE: 28277407\n" ] } ], "source": [ "# Going down to the fiole section of the objects/object portion of the JSON tree\n", "object = fields[\"firstDigitalObject\"][0]\n", "\n", "# Grabbing the @url value\n", "object_url = object[\"objectUrl\"]\n", "# We choose to print the URL key which will give us the location of the file\n", "print( \"- URL VALUE:\", object_url)\n", "\n", "# Grabbing the @name value\n", "object_id = object[\"objecId\"]\n", "# Printing the name value\n", "print( \"- NAME VALUE:\", object_id)" ] }, { "cell_type": "markdown", "id": "bf2f9f7e-dce6-4792-ad18-bd947a00e007", "metadata": {}, "source": [ "## ==> Follow the URL link and \"GET\" the actual .PDF file for the entire Morgenthau Diary for Volume 696 and download it locally" ] }, { "cell_type": "code", "execution_count": 29, "id": "de052ea4-8413-40ee-a7d3-8194217d8e6d", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "20024779" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# We then get that URL\n", "r = requests.get(object_url, allow_redirects=True)\n", "\n", "# and finally open and write the .PDF file\n", "open('696.pdf', 'wb').write(r.content)" ] }, { "cell_type": "code", "execution_count": null, "id": "saving-convenience", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }