{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# GESIS Binder Stats\n", "\n", "The notebook demonstrates the use of the [Gesis Gallery API](https://notebooks.gesis.org/gallery/api/v1.0/) for repositories launched at Gesis Binder.\n", "The notebook is based on: https://github.com/betatim/binderlyzer." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import datetime\n", "import pandas as pd\n", "import requests\n", "import time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#The data shown now in gallery is stored in database from 10/05/19 - So to make sure all data is taken, from_dt = 9th may\n", "from_dt = datetime.datetime(2019,5,9).isoformat() \n", "url = f'https://notebooks.gesis.org/gallery/api/v1.0/launches/{from_dt}/'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "launches = []\n", "# because of pagination the api gives 100 results per page so for analysis you have to take data in all pages\n", "next_page = 1\n", "while next_page is not None:\n", " api_url = url + str('?page=') + str(next_page)\n", " r = requests.get(api_url)\n", " response = r.json()\n", " # check the limit of queries per second/minute,\n", " message = response.get(\"message\", \"\")\n", " if message not in [\"2 per 1 second\", \"100 per 1 minute\"]:\n", " launches.extend(response['launches'])\n", " next_page = response['next_page']\n", " else:\n", " time.sleep(1)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data= pd.DataFrame.from_dict(launches)\n", "data.head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# make it easier to grab the ref\n", "data['repo'] = data['spec'].apply(lambda s: s.rsplit(\"/\", 1)[0])\n", "data['org'] = data['spec'].apply(lambda s: s.split(\"/\", 1)[0])\n", "data['ref'] = data['spec'].apply(lambda s: s.rsplit(\"/\", 1)[1])\n", "data = data.drop(columns=['schema', 'version', 'spec'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Where are repositories hosted?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(data.groupby(\"provider\")\n", " .size()\n", " .reset_index(name='Launches')\n", " .sort_values('Launches', ascending=False))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# add a nnew column showing total launches per repo\n", "totals_per_repo = (data.groupby([\"repo\"])\n", " .size()\n", " .reset_index(name='repo_counts'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# add a nnew column showing total launches per org\n", "totals_per_org = (data.groupby([\"org\"])\n", " .size()\n", " .reset_index(name='org_counts'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data_ = pd.merge(data, totals_per_repo, on='repo')\n", "data_ = pd.merge(data_, totals_per_org, on='org')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(data_.groupby([\"org\", \"repo\", \"ref\", \"repo_counts\", \"org_counts\"])\n", " .size()\n", " # give the column a nice name\n", " .reset_index(name='ref_counts')\n", " # sort first by total launches, then within a repo by ref launches\n", " .sort_values(['org_counts', 'repo_counts', 'ref_counts'],\n", " ascending=[False,False, False])\n", " .set_index([\"org\", 'repo', 'ref'])\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }