{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Stock exchange – visualising page data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"complete_page_list.csv\", parse_dates=[\"date\"])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 27244\n", "M 24538\n", "N 20971\n", "U 1\n", "Name: session, dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"session\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df[\"year\"] = df[\"date\"].dt.year" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df_years = df[\"year\"].value_counts().to_frame().reset_index()\n", "df_years.columns = [\"year\", \"count\"]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Pages per year\n", "\n", "alt.Chart(df_years).mark_bar().encode(x=\"year:N\", y=\"count:Q\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df_dates = pd.DataFrame(df[\"date\"].unique())\n", "df_dates[\"year\"] = df_dates[0].dt.year\n", "df_years_dates = df_dates[\"year\"].value_counts().to_frame().reset_index()\n", "df_years_dates.columns = [\"year\", \"count\"]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Days per year\n", "\n", "alt.Chart(df_years_dates).mark_bar().encode(x=\"year:N\", y=\"count:Q\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "orders = {\"M\": 1, \"N\": 2, \"A\": 3, \"U\": 4}\n", "df_sessions = (\n", " df.groupby(by=\"year\")[\"session\"]\n", " .value_counts()\n", " .to_frame()\n", " .rename(columns={\"session\": \"count\"}, level=0)\n", " .reset_index()\n", ")\n", "df_sessions[\"order\"] = df_sessions[\"session\"].apply(lambda x: orders[x])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Days per year\n", "\n", "alt.Chart(df_sessions).mark_bar().encode(\n", " x=\"year:N\",\n", " y=\"count:Q\",\n", " color=\"session:N\",\n", " order=alt.Order(\n", " # Sort the segments of the bars by this field\n", " \"order\",\n", " sort=\"ascending\",\n", " ),\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "Created by [Tim Sherratt](https://timsherratt.org) for the [GLAM Workbench](https://glam-workbench.github.io/)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 4 }