{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sydney Stock Exchange – view sheets \n", "\n", "Select a date range to view details of available sheets." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This notebook is designed to run in Voila as an app (with the code hidden).\n", "# To launch this notebook in Voila, just select 'View > Open with Voila in New Browser Tab'\n", "# Your browser might ask for permission to open the new tab as a popup." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import ipywidgets as widgets\n", "import arrow\n", "import datetime\n", "from pathlib import Path\n", "from IPython.display import display, HTML, Image\n", "from urllib.parse import quote_plus\n", "import json\n", "from collections import Counter\n", "import re\n", "from page_data_master import *\n", "\n", "CLOUDSTOR_URL = 'https://cloudstor.aarnet.edu.au/plus/s/i02k4gxeEpMAUkm'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_pages(vol_num, page_num):\n", " for key, pages in pages_per_vol.items():\n", " vols = key.split('_')\n", " vols = [int(y) for y in vols]\n", " if len(vols) == 2:\n", " vols = list(range(vols[0], vols[1] + 1))\n", " if vol_num in vols:\n", " for p_key, p_pages in pages.items():\n", " p_range = p_key.split('_')\n", " if p_range[1] == '*':\n", " if page_num >= int(p_range[0]):\n", " return p_pages\n", " else:\n", " if page_num >= int(p_range[0]) and page_num <= int(p_range[1]):\n", " return p_pages" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the list of dates\n", "df_dates = pd.read_csv('complete_date_list.csv', parse_dates=['date'])\n", "# Get the list of pages\n", "df_pages = pd.read_csv('complete_page_list.csv', parse_dates=['date'])\n", "# Merge dates and pages on the date field\n", "df = pd.merge(df_dates, df_pages, how='left', on='date').sort_values(by=['date', 'page_num'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def find_vol(date):\n", " series = pd.read_csv('series_list.csv')\n", " for volume in series.itertuples():\n", " find_date = arrow.get(date)\n", " start_date = arrow.get(volume.start_date, 'YYYY-MM-DD')\n", " end_date = arrow.get(volume.end_date, 'YYYY-MM-DD')\n", " if find_date >= start_date and find_date <= end_date:\n", " return int(re.search(r'-(\\d+)$', volume.Item_number).group(1).strip())\n", " return None\n", "\n", "def make_clickable(val):\n", " # target _blank to open new window\n", " return 'Download'.format(val, val)\n", "\n", "def find_pages():\n", " results.clear_output()\n", " images = []\n", " # start_date = arrow.get(start_date.value)\n", " # end_date = arrow.get(end_date.value)\n", " with results:\n", " dates = df_dates.loc[(df_dates['date'] >= pd.Timestamp(start_date.value)) & (df_dates['date'] <= pd.Timestamp(end_date.value))]\n", " for date in dates.itertuples():\n", " display(HTML(f'

{arrow.get(date.date).format(\"D MMMM YYYY\")}

'))\n", " if pd.notnull(date.reason):\n", " display(HTML(f'

{date.reason}

'))\n", " if date.pages > 0:\n", " pages = df_pages.loc[df_pages['date'] == date.date].copy(deep=False)\n", " first_page = pages.iloc[0]['page_num']\n", " vol_num = pages.iloc[0]['vol_num']\n", " \n", " cloudstor_folder_url = f'{CLOUDSTOR_URL}/download?path=AU%20NBAC%20N193-{vol_num:03}'\n", " pages['image_url'] = pages.apply(lambda x: f'{cloudstor_folder_url}&files=N193-{int(vol_num):03}_{int(x[\"page_num\"]):04}.jpg', axis=1)\n", " \n", " # Get the expected number of pages\n", " expected = get_pages(int(vol_num), int(first_page))\n", " if date.date.weekday() < 5:\n", " expected_pages = expected['weekday']\n", " elif date.date.weekday() == 5:\n", " expected_pages = expected['saturday']\n", " expected_sessions = expected_pages[1]\n", " if date.expected != date.pages:\n", " display(HTML(f'

Number of pages: {date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages[\"session\"].str.cat()})

'))\n", " else:\n", " display(HTML(f'

Number of pages: {date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages[\"session\"].str.cat()})

'))\n", " display(pages.style.format({'image_url': make_clickable}))\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def start(b):\n", " find_pages()\n", "\n", "start_date = widgets.DatePicker(\n", " description='Start date',\n", " disabled=False,\n", " value=datetime.date(1901, 1, 1)\n", ")\n", "\n", "end_date = widgets.DatePicker(\n", " description='End date',\n", " disabled=False,\n", " value=datetime.date(1901, 6, 30)\n", ")\n", "\n", "find = widgets.Button(\n", " description='Find pages',\n", " disabled=False,\n", " button_style='primary', # 'success', 'info', 'warning', 'danger' or ''\n", " tooltip='Click me',\n", " icon='search'\n", ")\n", "\n", "find.on_click(start)\n", "results = widgets.Output()\n", "display(widgets.HBox([widgets.VBox([start_date, end_date]), find]))\n", "display(results)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "Created by [Tim Sherratt](https://timsherratt.org) for the [GLAM Workbench](https://glam-workbench.github.io/)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "voila": { "template": "material" } }, "nbformat": 4, "nbformat_minor": 4 }