{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sydney Stock Exchange – details by volume" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import re\n", "from pathlib import Path\n", "\n", "import arrow\n", "import pandas as pd\n", "import requests\n", "from bs4 import BeautifulSoup\n", "from IPython.display import HTML, display" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "ead_url = \"https://archivescollection.anu.edu.au/index.php/or59j;ead?sf_format=xml\"\n", "cloudstor_url = \"https://cloudstor.aarnet.edu.au/plus/s/i02k4gxeEpMAUkm\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Get the list of dates\n", "df_dates = pd.read_csv(\"complete_date_list.csv\", parse_dates=[\"date\"])\n", "# Get the list of pages\n", "df_pages = pd.read_csv(\"complete_page_list.csv\", parse_dates=[\"date\"])\n", "# Get the list of volumes\n", "df_vols = pd.read_csv(\"series_list.csv\")\n", "# Extract volume number\n", "df_vols[\"vol_num\"] = df_vols[\"Item_number\"].str.extract(r\"-(\\d+)$\").astype(\"int\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Get the list of volumes from ATOM in XML\n", "response = requests.get(ead_url)\n", "soup = BeautifulSoup(response.text)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Estimated 303 of 317 pages (95.58% complete)
" ], "text/plain": [ "Estimated 312 of 314 pages (99.36% complete)
" ], "text/plain": [ "Estimated 345 of 346 pages (99.71% complete)
" ], "text/plain": [ "Estimated 312 of 319 pages (97.81% complete)
" ], "text/plain": [ "Estimated 305 of 312 pages (97.76% complete)
" ], "text/plain": [ "Estimated 334 of 341 pages (97.95% complete)
" ], "text/plain": [ "Estimated 349 of 351 pages (99.43% complete)
" ], "text/plain": [ "Estimated 317 of 319 pages (99.37% complete)
" ], "text/plain": [ "Estimated 327 of 329 pages (99.39% complete)
" ], "text/plain": [ "Estimated 327 of 329 pages (99.39% complete)
" ], "text/plain": [ "Estimated 350 of 351 pages (99.72% complete)
" ], "text/plain": [ "Estimated 310 of 319 pages (97.18% complete)
" ], "text/plain": [ "Estimated 330 of 334 pages (98.80% complete)
" ], "text/plain": [ "Estimated 329 of 329 pages (100.00% complete)
" ], "text/plain": [ "Estimated 349 of 351 pages (99.43% complete)
" ], "text/plain": [ "Estimated 312 of 314 pages (99.36% complete)
" ], "text/plain": [ "Estimated 329 of 329 pages (100.00% complete)
" ], "text/plain": [ "Estimated 322 of 324 pages (99.38% complete)
" ], "text/plain": [ "Estimated 348 of 348 pages (100.00% complete)
" ], "text/plain": [ "Estimated 312 of 312 pages (100.00% complete)
" ], "text/plain": [ "Estimated 330 of 331 pages (99.70% complete)
" ], "text/plain": [ "Estimated 314 of 324 pages (96.91% complete)
" ], "text/plain": [ "Estimated 344 of 346 pages (99.42% complete)
" ], "text/plain": [ "Estimated 310 of 312 pages (99.36% complete)
" ], "text/plain": [ "Estimated 323 of 327 pages (98.78% complete)
" ], "text/plain": [ "Estimated 332 of 336 pages (98.81% complete)
" ], "text/plain": [ "Estimated 349 of 351 pages (99.43% complete)
" ], "text/plain": [ "Estimated 314 of 319 pages (98.43% complete)
" ], "text/plain": [ "Estimated 328 of 329 pages (99.70% complete)
" ], "text/plain": [ "Estimated 327 of 329 pages (99.39% complete)
" ], "text/plain": [ "Estimated 339 of 341 pages (99.41% complete)
" ], "text/plain": [ "Estimated 316 of 319 pages (99.06% complete)
" ], "text/plain": [ "Estimated 329 of 329 pages (100.00% complete)
" ], "text/plain": [ "Estimated 322 of 324 pages (99.38% complete)
" ], "text/plain": [ "Estimated 350 of 351 pages (99.72% complete)
" ], "text/plain": [ "Estimated 317 of 319 pages (99.37% complete)
" ], "text/plain": [ "Estimated 300 of 302 pages (99.34% complete)
" ], "text/plain": [ "Estimated 328 of 331 pages (99.09% complete)
" ], "text/plain": [ "Estimated 350 of 351 pages (99.72% complete)
" ], "text/plain": [ "Estimated 317 of 319 pages (99.37% complete)
" ], "text/plain": [ "Estimated 325 of 329 pages (98.78% complete)
" ], "text/plain": [ "Estimated 318 of 322 pages (98.76% complete)
" ], "text/plain": [ "Estimated 343 of 343 pages (100.00% complete)
" ], "text/plain": [ "Estimated 317 of 317 pages (100.00% complete)
" ], "text/plain": [ "Estimated 331 of 331 pages (100.00% complete)
" ], "text/plain": [ "Estimated 317 of 319 pages (99.37% complete)
" ], "text/plain": [ "Estimated 350 of 351 pages (99.72% complete)
" ], "text/plain": [ "Estimated 314 of 314 pages (100.00% complete)
" ], "text/plain": [ "Estimated 296 of 302 pages (98.01% complete)
" ], "text/plain": [ "Estimated 337 of 341 pages (98.83% complete)
" ], "text/plain": [ "Estimated 351 of 351 pages (100.00% complete)
" ], "text/plain": [ "Estimated 313 of 317 pages (98.74% complete)
" ], "text/plain": [ "Estimated 325 of 329 pages (98.78% complete)
" ], "text/plain": [ "Estimated 315 of 319 pages (98.75% complete)
" ], "text/plain": [ "Estimated 167 of 351 pages (47.58% complete)
" ], "text/plain": [ "Estimated 315 of 319 pages (98.75% complete)
" ], "text/plain": [ "Estimated 318 of 319 pages (99.69% complete)
" ], "text/plain": [ "Estimated 309 of 309 pages (100.00% complete)
" ], "text/plain": [ "Estimated 347 of 351 pages (98.86% complete)
" ], "text/plain": [ "Estimated 313 of 319 pages (98.12% complete)
" ], "text/plain": [ "Estimated 317 of 317 pages (100.00% complete)
" ], "text/plain": [ "Estimated 312 of 314 pages (99.36% complete)
" ], "text/plain": [ "Estimated 345 of 348 pages (99.14% complete)
" ], "text/plain": [ "Estimated 309 of 313 pages (98.72% complete)
" ], "text/plain": [ "Estimated 317 of 319 pages (99.37% complete)
" ], "text/plain": [ "Estimated 307 of 314 pages (97.77% complete)
" ], "text/plain": [ "Estimated 343 of 346 pages (99.13% complete)
" ], "text/plain": [ "Estimated 313 of 317 pages (98.74% complete)
" ], "text/plain": [ "Estimated 310 of 310 pages (100.00% complete)
" ], "text/plain": [ "Estimated 325 of 326 pages (99.69% complete)
" ], "text/plain": [ "Estimated 351 of 351 pages (100.00% complete)
" ], "text/plain": [ "Estimated 296 of 302 pages (98.01% complete)
" ], "text/plain": [ "Estimated 321 of 324 pages (99.07% complete)
" ], "text/plain": [ "Estimated 314 of 314 pages (100.00% complete)
" ], "text/plain": [ "Estimated 348 of 349 pages (99.71% complete)
" ], "text/plain": [ "Estimated 306 of 309 pages (99.03% complete)
" ], "text/plain": [ "Estimated 331 of 334 pages (99.10% complete)
" ], "text/plain": [ "Estimated 309 of 309 pages (100.00% complete)
" ], "text/plain": [ "Estimated 351 of 351 pages (100.00% complete)
" ], "text/plain": [ "Estimated 317 of 319 pages (99.37% complete)
" ], "text/plain": [ "Estimated 281 of 285 pages (98.60% complete)
" ], "text/plain": [ "Estimated 333 of 336 pages (99.11% complete)
" ], "text/plain": [ "Estimated 350 of 351 pages (99.72% complete)
" ], "text/plain": [ "Estimated 312 of 314 pages (99.36% complete)
" ], "text/plain": [ "Estimated 315 of 317 pages (99.37% complete)
" ], "text/plain": [ "