{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Download the contents of a digitised file from the National Archives of Australia\n", "\n", "**Why?** RecordSearch lets you download a PDF of a digitised file, but sometimes it's more convenient to work with individual images.\n", "\n", "**How?** Just enter the barcode of the file in the box below and click the button. When all the images have been downloaded they'll be zipped up, and a convenient download link will be displayed.\n", "\n", "**More?** Click the 'Edit App' button at the top of the page to see how this works." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import os\n", "import shutil\n", "import requests\n", "import ipywidgets as widgets\n", "import time\n", "from slugify import slugify\n", "from IPython.display import display, HTML, FileLink\n", "from tqdm import tqdm_notebook\n", "from recordsearch_tools.client import RSItemClient\n", "from recordsearch_tools.utilities import retry" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "@retry(ConnectionError, tries=20, delay=10, backoff=1)\n", "def get_images(b):\n", " if barcode.value:\n", " client = RSItemClient()\n", " item = client.get_summary(entity_id=barcode.value)\n", " if item['digitised_pages'] > 0:\n", " series = slugify(item['series'])\n", " control = slugify(item['control_symbol'])\n", " directory = os.path.join('data', 'images', '{}-{}-[{}]'.format(series, control, item['identifier']))\n", " if not os.path.exists(directory):\n", " os.makedirs(directory)\n", " for page in tqdm_notebook(range(1, item['digitised_pages'] + 1)):\n", " filename = '{}/{}-p{}.jpg'.format(directory, item['identifier'], page)\n", " if not os.path.exists(filename):\n", " img_url = 'http://recordsearch.naa.gov.au/NaaMedia/ShowImage.asp?B={}&S={}&T=P'.format(item['identifier'], page)\n", " response = requests.get(img_url, stream=True, verify=False)\n", " response.raise_for_status()\n", " with open(filename, 'wb') as out_file:\n", " shutil.copyfileobj(response.raw, out_file)\n", " time.sleep(0.5)\n", " shutil.make_archive(directory, 'zip', directory)\n", " link = FileLink('{}.zip'.format(directory))\n", " display(HTML('Download zipped images:'), FileLink('{}.zip'.format(directory)))\n", " else:\n", " print('Sorry, that item has not been digitised...')\n", " else:\n", " print('You need to provide a barcode!')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4051a10f80b9486f9e1c6237dd0b0352", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Text(value='', description='Barcode:', placeholder='Enter item barcode')" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "barcode = widgets.Text(\n", " placeholder='Enter item barcode',\n", " description='Barcode:',\n", " disabled=False\n", ")\n", "display(barcode)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0dade9b791884f90bef84e2b90fc70af", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(button_style='primary', description='Get images', style=ButtonStyle(), tooltip='Click to harvest images…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "button = widgets.Button(\n", " description='Get images',\n", " disabled=False,\n", " button_style='primary', # 'success', 'info', 'warning', 'danger' or ''\n", " tooltip='Click to harvest images',\n", " icon=''\n", " )\n", "button.on_click(get_images)\n", "display(button)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }