{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sydney Stock Exchange trading activity, 1901–1950\n", "\n", "These charts show the amount of trading activity recorded each day by the Sydney Stock Exchange. At the end of each trading session, details of any activity were recorded by hand on printed sheets, under the headings 'Buyers', 'Sellers', and 'Business done' or 'Sales' (the column headings changed over time). Using computer vision technology and the Amazon Textract service, the printed labels and handwritten entries have been extracted from the sheets and compiled into a tabular format. This data is far from perfect, and is still being checked and cleaned. Some values will be missing altogether, particularly on pages with dense, overlapping annotations. However, even with these caveats, the raw number of handwritten entries in the 'Buyers', 'Sellers', and 'Sales' columns can show patterns in trading activity over time.\n", "\n", "The charts show the proportion of stocks each day that have prices recorded. Click on any of the points on the charts to view activity on a particular date." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%%capture\n", "import altair as alt\n", "import pandas as pd\n", "from IPython.display import display\n", "\n", "alt.data_transformers.enable(\"default\")\n", "\n", "\n", "def blank_href():\n", " return {\"usermeta\": {\"embedOptions\": {\"loader\": {\"target\": \"_blank\"}}}}\n", "\n", "\n", "# register the custom theme under a chosen name\n", "alt.themes.register(\"blank_href\", blank_href)\n", "\n", "# enable the newly registered theme\n", "alt.themes.enable(\"blank_href\")\n", "\n", "CLOUDSTOR_SHARE_LINK = \"https://cloudstor.aarnet.edu.au/plus/s/RwRrCpisBac7N38\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def calculate_activity_by_day(year):\n", " # df = pd.read_csv(Path('compiled-ocr-data', f'{year}-textract.csv'), parse_dates=['date'])\n", " df = pd.read_csv(\n", " f\"{CLOUDSTOR_SHARE_LINK}/download?files={year}-textract.csv\",\n", " parse_dates=[\"date\"],\n", " dtype={\n", " \"label\": \"object\",\n", " \"buyers\": \"object\",\n", " \"sellers\": \"object\",\n", " \"sales\": \"object\",\n", " },\n", " )\n", " dfs = []\n", " total = df[\"date\"].value_counts().to_frame()\n", " total.columns = [\"total\"]\n", " # any_activity = df.loc[(df['sales'].notnull()) | (df['buyers'].notnull()) | (df['sellers'].notnull())]['date'].value_counts().to_frame()\n", " any_activity = (\n", " df.loc[(df[\"buyers\"].notnull()) | (df[\"sellers\"].notnull())][\"date\"]\n", " .value_counts()\n", " .to_frame()\n", " )\n", " any_activity.columns = [\"count\"]\n", " any_activity[\"activity\"] = \"any\"\n", " any_totals = pd.concat([any_activity, total], axis=1)\n", " r = pd.date_range(\n", " start=f\"{any_totals.index.min().year}-01-01\",\n", " end=f\"{any_totals.index.max().year}-12-31\",\n", " )\n", " any_totals = any_totals.reindex(r)\n", " any_totals[\"activity\"].fillna(\"any\", inplace=True)\n", " any_totals.fillna(0, inplace=True)\n", " dfs.append(any_totals)\n", " buyers = df.loc[df[\"buyers\"].notnull()][\"date\"].value_counts().to_frame()\n", " buyers.columns = [\"count\"]\n", " buyers[\"activity\"] = \"buyers\"\n", " dfs.append(pd.concat([buyers, total], axis=1))\n", " sellers = df.loc[df[\"sellers\"].notnull()][\"date\"].value_counts().to_frame()\n", " sellers.columns = [\"count\"]\n", " sellers[\"activity\"] = \"sellers\"\n", " dfs.append(pd.concat([sellers, total], axis=1))\n", " sales = df.loc[df[\"sales\"].notnull()][\"date\"].value_counts().to_frame()\n", " sales.columns = [\"count\"]\n", " sales[\"activity\"] = \"sales\"\n", " dfs.append(pd.concat([sales, total], axis=1))\n", " combined = pd.concat(dfs).rename_axis(\"date\").reset_index()\n", " # combined['year'] = year\n", " # display(combined)\n", " return combined" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "tags": [], "usermeta": { "embedOptions": { "theme": "dark" } } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "for year in range(1901, 1951):\n", " df = calculate_activity_by_day(year)\n", "\n", " chart = (\n", " alt.Chart(df.loc[df[\"activity\"].isin([\"buyers\", \"sellers\", \"sales\"])])\n", " .transform_calculate(\n", " ratio=\"datum.count / datum.total\",\n", " iso_date='timeFormat(datum.date, \"%Y-%m-%d\")',\n", " url=\"https://sydney-stock-exchange-xqtkxtd5za-ts.a.run.app/stock_exchange/stocks?date__exact=\"\n", " + alt.datum.iso_date\n", " + \"&\"\n", " + alt.datum.activity\n", " + \"__notblank=1\",\n", " )\n", " .mark_point()\n", " .encode(\n", " x=alt.X(\"date:T\", axis=alt.Axis(grid=False, format=\"%B\"), title=\"Date\"),\n", " y=alt.Y(\n", " \"ratio:Q\",\n", " axis=alt.Axis(format=\"%\", title=\"Percentage of stocks active\"),\n", " ),\n", " color=alt.Color(\n", " \"activity:N\",\n", " sort=[\"buyers\", \"sellers\", \"sales\"],\n", " legend=alt.Legend(title=\"Activity\"),\n", " ),\n", " href=\"url:N\",\n", " tooltip=[\n", " alt.Tooltip(\"date\", format=\"%a, %e %b %Y\"),\n", " alt.Tooltip(\"ratio:Q\", format=\".2%\", title=\"percent of stocks active\"),\n", " alt.Tooltip(\"count\", title=\"number of stocks active\"),\n", " \"activity\",\n", " ],\n", " )\n", " .interactive()\n", " )\n", "\n", " chart2 = (\n", " alt.Chart(df.loc[df[\"activity\"] == \"any\"])\n", " .transform_calculate(\n", " ratio=\"datum.count / datum.total\",\n", " )\n", " .mark_area(opacity=0.2, color=\"grey\")\n", " .encode(\n", " x=\"date:T\",\n", " y=\"ratio:Q\",\n", " tooltip=[\n", " alt.Tooltip(\"date\", format=\"%a, %e %b %Y\"),\n", " alt.Tooltip(\"ratio:Q\", format=\".2%\", title=\"percent of stocks active\"),\n", " \"count\",\n", " \"activity:N\",\n", " ],\n", " )\n", " .interactive()\n", " )\n", "\n", " display(\n", " (chart2 + chart)\n", " .configure_title(fontSize=20)\n", " .properties(width=800, height=300, title=str(year), padding=20)\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 4 }