{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyze traffic patterns for bioRxiv preprints" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas\n", "import seaborn\n", "import numpy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "preprint_df = pandas.read_csv('data/01.preprints.tsv', sep='\\t')\n", "traffic_df = pandas.read_csv('data/01.preprint-traffic.tsv.xz', sep='\\t', dtype={'months_since_journal': 'Int64'})" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | rxivist_preprint_id | \n", "period | \n", "preprint_views | \n", "preprint_downloads | \n", "months_since_preprint | \n", "months_since_journal | \n", "preprint_download_ratio | \n", "
---|---|---|---|---|---|---|---|
7 | \n", "387 | \n", "2018-08 | \n", "399 | \n", "42 | \n", "0 | \n", "-3 | \n", "0.095238 | \n", "
8 | \n", "387 | \n", "2018-09 | \n", "75 | \n", "38 | \n", "1 | \n", "-2 | \n", "0.336283 | \n", "