{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sydney Stock Exchange – view sheets \n", "\n", "Select a date range to view details of available sheets." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This notebook is designed to run in Voila as an app (with the code hidden).\n", "# To launch this notebook in Voila, just select 'View > Open with Voila in New Browser Tab'\n", "# Your browser might ask for permission to open the new tab as a popup." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import re\n", "\n", "import arrow\n", "import ipywidgets as widgets\n", "import pandas as pd\n", "from IPython.display import HTML, display\n", "\n", "from page_data_master import pages_per_vol\n", "\n", "CLOUDSTOR_URL = \"https://cloudstor.aarnet.edu.au/plus/s/i02k4gxeEpMAUkm\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_pages(vol_num, page_num):\n", " for key, pages in pages_per_vol.items():\n", " vols = key.split(\"_\")\n", " vols = [int(y) for y in vols]\n", " if len(vols) == 2:\n", " vols = list(range(vols[0], vols[1] + 1))\n", " if vol_num in vols:\n", " for p_key, p_pages in pages.items():\n", " p_range = p_key.split(\"_\")\n", " if p_range[1] == \"*\":\n", " if page_num >= int(p_range[0]):\n", " return p_pages\n", " else:\n", " if page_num >= int(p_range[0]) and page_num <= int(p_range[1]):\n", " return p_pages" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the list of dates\n", "df_dates = pd.read_csv(\"complete_date_list.csv\", parse_dates=[\"date\"])\n", "# Get the list of pages\n", "df_pages = pd.read_csv(\"complete_page_list.csv\", parse_dates=[\"date\"])\n", "# Merge dates and pages on the date field\n", "df = pd.merge(df_dates, df_pages, how=\"left\", on=\"date\").sort_values(\n", " by=[\"date\", \"page_num\"]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def find_vol(date):\n", " series = pd.read_csv(\"series_list.csv\")\n", " for volume in series.itertuples():\n", " find_date = arrow.get(date)\n", " start_date = arrow.get(volume.start_date, \"YYYY-MM-DD\")\n", " end_date = arrow.get(volume.end_date, \"YYYY-MM-DD\")\n", " if find_date >= start_date and find_date <= end_date:\n", " return int(re.search(r\"-(\\d+)$\", volume.Item_number).group(1).strip())\n", " return None\n", "\n", "\n", "def make_clickable(val):\n", " # target _blank to open new window\n", " return 'Download'.format(val)\n", "\n", "\n", "def find_pages():\n", " results.clear_output()\n", " # start_date = arrow.get(start_date.value)\n", " # end_date = arrow.get(end_date.value)\n", " with results:\n", " dates = df_dates.loc[\n", " (df_dates[\"date\"] >= pd.Timestamp(start_date.value))\n", " & (df_dates[\"date\"] <= pd.Timestamp(end_date.value))\n", " ]\n", " for date in dates.itertuples():\n", " display(HTML(f'

{arrow.get(date.date).format(\"D MMMM YYYY\")}

'))\n", " if pd.notnull(date.reason):\n", " display(HTML(f\"

{date.reason}

\"))\n", " if date.pages > 0:\n", " pages = df_pages.loc[df_pages[\"date\"] == date.date].copy(deep=False)\n", " first_page = pages.iloc[0][\"page_num\"]\n", " vol_num = pages.iloc[0][\"vol_num\"]\n", "\n", " cloudstor_folder_url = (\n", " f\"{CLOUDSTOR_URL}/download?path=AU%20NBAC%20N193-{vol_num:03}\"\n", " )\n", " pages[\"image_url\"] = pages.apply(\n", " lambda x: f'{cloudstor_folder_url}&files=N193-{int(vol_num):03}_{int(x[\"page_num\"]):04}.jpg',\n", " axis=1,\n", " )\n", "\n", " # Get the expected number of pages\n", " expected = get_pages(int(vol_num), int(first_page))\n", " if date.date.weekday() < 5:\n", " expected_pages = expected[\"weekday\"]\n", " elif date.date.weekday() == 5:\n", " expected_pages = expected[\"saturday\"]\n", " expected_sessions = expected_pages[1]\n", " if date.expected != date.pages:\n", " display(\n", " HTML(\n", " f'

Number of pages: {date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages[\"session\"].str.cat()})

'\n", " )\n", " )\n", " else:\n", " display(\n", " HTML(\n", " f'

Number of pages: {date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages[\"session\"].str.cat()})

'\n", " )\n", " )\n", " display(pages.style.format({\"image_url\": make_clickable}))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def start(b):\n", " find_pages()\n", "\n", "\n", "start_date = widgets.DatePicker(\n", " description=\"Start date\", disabled=False, value=datetime.date(1901, 1, 1)\n", ")\n", "\n", "end_date = widgets.DatePicker(\n", " description=\"End date\", disabled=False, value=datetime.date(1901, 6, 30)\n", ")\n", "\n", "find = widgets.Button(\n", " description=\"Find pages\",\n", " disabled=False,\n", " button_style=\"primary\", # 'success', 'info', 'warning', 'danger' or ''\n", " tooltip=\"Click me\",\n", " icon=\"search\",\n", ")\n", "\n", "find.on_click(start)\n", "results = widgets.Output()\n", "display(widgets.HBox([widgets.VBox([start_date, end_date]), find]))\n", "display(results)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "Created by [Tim Sherratt](https://timsherratt.org) for the [GLAM Workbench](https://glam-workbench.github.io/)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" }, "voila": { "template": "material" } }, "nbformat": 4, "nbformat_minor": 4 }