{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Stock exchange – visualising page data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from IPython.display import display, HTML\n", "import altair as alt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('complete_page_list.csv', parse_dates=['date'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 27244\n", "M 24538\n", "N 20971\n", "U 1\n", "Name: session, dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['session'].value_counts()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df['year'] = df['date'].dt.year" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df_years = df['year'].value_counts().to_frame().reset_index()\n", "df_years.columns = ['year', 'count']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Pages per year\n", "\n", "alt.Chart(df_years).mark_bar().encode(\n", " x='year:N',\n", " y='count:Q'\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df_dates = pd.DataFrame(df['date'].unique())\n", "df_dates['year'] = df_dates[0].dt.year\n", "df_years_dates = df_dates['year'].value_counts().to_frame().reset_index()\n", "df_years_dates.columns = ['year', 'count']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Days per year\n", "\n", "alt.Chart(df_years_dates).mark_bar().encode(\n", " x='year:N',\n", " y='count:Q'\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "orders = {'M': 1, 'N': 2, 'A': 3, 'U': 4}\n", "df_sessions = df.groupby(by='year')['session'].value_counts().to_frame().rename(columns={'session': 'count'}, level=0).reset_index()\n", "df_sessions['order'] = df_sessions['session'].apply(lambda x: orders[x])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Days per year\n", "\n", "alt.Chart(df_sessions).mark_bar().encode(\n", " x='year:N',\n", " y='count:Q',\n", " color='session:N',\n", " order=alt.Order(\n", " # Sort the segments of the bars by this field\n", " 'order',\n", " sort='ascending'\n", " )\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "Created by [Tim Sherratt](https://timsherratt.org) for the [GLAM Workbench](https://glam-workbench.github.io/)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }