{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## PySAL Change Log Statistics\n", "\n", "This notebook generates the summary statistics for a package. \n", "\n", "It assumes you are running this under the `tools` directory at the toplevel of the package\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Change the values only in the next cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:03.766277Z", "start_time": "2020-10-22T21:36:03.763668Z" } }, "outputs": [], "source": [ "package_name = \"spaghetti\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**This notebook will generate a file in the current directory with the name ``changelog.md``. You can edit and append this on front of the ``CHANGELOG.md`` file for the package release.**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.090911Z", "start_time": "2020-10-22T21:36:03.768293Z" } }, "outputs": [], "source": [ "from __future__ import print_function\n", "import os\n", "import json\n", "import re\n", "import sys\n", "import pandas\n", "\n", "from datetime import datetime, timedelta\n", "from time import sleep\n", "from subprocess import check_output, Popen, PIPE\n", "try:\n", " from urllib import urlopen\n", "except:\n", " from urllib.request import urlopen\n", "\n", "import ssl\n", "import yaml\n", "\n", "context = ssl._create_unverified_context()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.434941Z", "start_time": "2020-10-22T21:36:04.093347Z" } }, "outputs": [], "source": [ "# get date of last tag\n", "x, err = Popen(\n", " 'git log -1 --tags --simplify-by-decoration --pretty=\"%ai\"| cat',\n", " stdin=PIPE,\n", " stdout=PIPE,\n", " stderr=PIPE,\n", " shell=True\n", ").communicate()\n", "start_date = x.split()[0].decode(\"utf-8\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.442244Z", "start_time": "2020-10-22T21:36:04.439905Z" } }, "outputs": [], "source": [ "# today's date\n", "release_date = str(datetime.today()).split()[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.451770Z", "start_time": "2020-10-22T21:36:04.443871Z" } }, "outputs": [], "source": [ "CWD = os.path.abspath(os.path.curdir)\n", "CWD" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.459442Z", "start_time": "2020-10-22T21:36:04.455310Z" } }, "outputs": [], "source": [ "since_date = '--since=\"{start}\"'.format(start=start_date)\n", "since_date\n", "since = datetime.strptime(start_date+\" 0:0:0\", \"%Y-%m-%d %H:%M:%S\")\n", "since" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.464127Z", "start_time": "2020-10-22T21:36:04.461553Z" } }, "outputs": [], "source": [ "# get __version__\n", "f = \"../{package}/__init__.py\".format(package=package_name)\n", "with open(f, 'r') as initfile:\n", " exec(initfile.readline()) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Total commits by subpackage" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.541685Z", "start_time": "2020-10-22T21:36:04.465876Z" } }, "outputs": [], "source": [ "cmd = [\"git\", \"log\", \"--oneline\", since_date]\n", "ncommits = len(check_output(cmd).splitlines())\n", "ncommits" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## List Contributors" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Some of our contributors have many aliases for the same identity. So, we've added a mapping to make sure that individuals are listed once (and only once). " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.547287Z", "start_time": "2020-10-22T21:36:04.543555Z" } }, "outputs": [], "source": [ "identities = {\"Levi John Wolf\": (\"ljwolf\", \"Levi John Wolf\"),\n", " \"Serge Rey\": (\"Serge Rey\", \"Sergio Rey\", \"sjsrey\", \"serge\"),\n", " \"Wei Kang\": (\"Wei Kang\", \"weikang9009\"),\n", " \"Dani Arribas-Bel\": (\"Dani Arribas-Bel\", \"darribas\")\n", "}\n", "\n", "def regularize_identity(string):\n", " string = string.decode()\n", " for name, aliases in identities.items():\n", " for alias in aliases:\n", " if alias in string:\n", " string = string.replace(alias, name)\n", " if len(string.split(\" \"))>1:\n", " string = string.title()\n", " return string.lstrip(\"* \")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.551029Z", "start_time": "2020-10-22T21:36:04.548928Z" } }, "outputs": [], "source": [ "author_cmd = [\"git\", \"log\", \"--format=* %aN\", since_date]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.555162Z", "start_time": "2020-10-22T21:36:04.552536Z" } }, "outputs": [], "source": [ "from collections import Counter" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.602226Z", "start_time": "2020-10-22T21:36:04.557700Z" } }, "outputs": [], "source": [ "ncommits = len(check_output(cmd).splitlines())\n", "all_authors = check_output(author_cmd).splitlines()\n", "counter = Counter([regularize_identity(author) for author in all_authors])\n", "unique_authors = sorted(set(all_authors))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.607165Z", "start_time": "2020-10-22T21:36:04.604087Z" } }, "outputs": [], "source": [ "unique_authors = counter.keys()\n", "unique_authors" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Disaggregate by PR, Issue" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.611966Z", "start_time": "2020-10-22T21:36:04.608793Z" } }, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", "ISO8601 = \"%Y-%m-%dT%H:%M:%SZ\"\n", "PER_PAGE = 100\n", "element_pat = re.compile(r'<(.+?)>')\n", "rel_pat = re.compile(r'rel=[\\'\"](\\w+)[\\'\"]')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:04.622437Z", "start_time": "2020-10-22T21:36:04.613663Z" } }, "outputs": [], "source": [ "def parse_link_header(headers):\n", " link_s = headers.get('link', '')\n", " urls = element_pat.findall(link_s)\n", " rels = rel_pat.findall(link_s)\n", " d = {}\n", " for rel,url in zip(rels, urls):\n", " d[rel] = url\n", " return d\n", "\n", "def get_paged_request(url):\n", " \"\"\"get a full list, handling APIv3's paging\"\"\"\n", " results = []\n", " while url:\n", " f = urlopen(url)\n", " results.extend(json.load(f))\n", " links = parse_link_header(f.headers)\n", " url = links.get('next')\n", " return results\n", "\n", "def get_issues(project=\"pysal/pysal\", state=\"closed\", pulls=False):\n", " \"\"\"Get a list of the issues from the Github API.\"\"\"\n", " which = \"pulls\" if pulls else \"issues\"\n", " url = \"https://api.github.com/repos/%s/%s?state=%s&per_page=%i\" % (project, which, state, PER_PAGE)\n", " return get_paged_request(url)\n", "\n", "def _parse_datetime(s):\n", " \"\"\"Parse dates in the format returned by the Github API.\"\"\"\n", " if s:\n", " return datetime.strptime(s, ISO8601)\n", " else:\n", " return datetime.fromtimestamp(0)\n", "\n", "def issues2dict(issues):\n", " \"\"\"Convert a list of issues to a dict, keyed by issue number.\"\"\"\n", " idict = {}\n", " for i in issues:\n", " idict[i[\"number\"]] = i\n", " return idict\n", "\n", "def is_pull_request(issue):\n", " \"\"\"Return True if the given issue is a pull request.\"\"\"\n", " return \"pull_request_url\" in issue\n", "\n", "def issues_closed_since(period=timedelta(days=365), project=\"pysal/pysal\", pulls=False):\n", " \"\"\" Get all issues closed since a particular point in time. period\n", " can either be a datetime object, or a timedelta object. In the\n", " latter case, it is used as a time before the present.\n", " \"\"\"\n", " which = \"pulls\" if pulls else \"issues\"\n", " if isinstance(period, timedelta):\n", " period = datetime.now() - period\n", " url = \"https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i\" % (project, which, period.strftime(ISO8601), PER_PAGE)\n", " allclosed = get_paged_request(url)\n", " filtered = [i for i in allclosed if _parse_datetime(i[\"closed_at\"]) > period]\n", " # exclude rejected PRs\n", " if pulls:\n", " filtered = [ pr for pr in filtered if pr['merged_at'] ]\n", " return filtered\n", "\n", "def sorted_by_field(issues, field=\"closed_at\", reverse=False):\n", " \"\"\"Return a list of issues sorted by closing date date.\"\"\"\n", " return sorted(issues, key = lambda i:i[field], reverse=reverse)\n", "\n", "def report(issues, show_urls=False):\n", " \"\"\"Summary report about a list of issues, printing number and title.\"\"\"\n", " # titles may have unicode in them, so we must encode everything below\n", " if show_urls:\n", " for i in issues:\n", " role = \"ghpull\" if \"merged_at\" in i else \"ghissue\"\n", " print(\"* :%s:`%d`: %s\" % (role, i[\"number\"], i[\"title\"].encode(\"utf-8\")))\n", " else:\n", " for i in issues:\n", " print(\"* %d: %s\" % (i[\"number\"], i[\"title\"].encode(\"utf-8\")))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.912753Z", "start_time": "2020-10-22T21:36:04.625031Z" } }, "outputs": [], "source": [ "all_issues = {}\n", "all_pulls = {}\n", "total_commits = 0\n", "prj = \"pysal/{package}\".format(package=package_name)\n", "issues = issues_closed_since(since, project=prj,pulls=False)\n", "pulls = issues_closed_since(since, project=prj,pulls=True)\n", "issues = sorted_by_field(issues, reverse=True)\n", "pulls = sorted_by_field(pulls, reverse=True)\n", "n_issues, n_pulls = map(len, (issues, pulls))\n", "n_total = n_issues + n_pulls" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.920905Z", "start_time": "2020-10-22T21:36:15.915824Z" } }, "outputs": [], "source": [ "issue_listing = []\n", "for issue in issues:\n", " entry = \"{title} (#{number})\".format(title=issue[\"title\"],number=issue[\"number\"])\n", " issue_listing.append(entry)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.932090Z", "start_time": "2020-10-22T21:36:15.926654Z" } }, "outputs": [], "source": [ "pull_listing = []\n", "for pull in pulls:\n", " entry = \"{title} (#{number})\".format(title=pull[\"title\"],number=pull[\"number\"])\n", " pull_listing.append(entry)\n", "pull_listing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.937741Z", "start_time": "2020-10-22T21:36:15.934886Z" } }, "outputs": [], "source": [ "message = \"We closed a total of {total} issues (enhancements and bug fixes) through {pr} pull requests\".format(total=n_total, pr=n_pulls)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.942999Z", "start_time": "2020-10-22T21:36:15.939328Z" } }, "outputs": [], "source": [ "message = \"{msg}, since our last release on {previous}.\".format(msg=message, previous=str(start_date))\n", "message" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.946995Z", "start_time": "2020-10-22T21:36:15.944329Z" } }, "outputs": [], "source": [ "message += \"\\n\\n## Issues Closed\\n\"\n", "message" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.953600Z", "start_time": "2020-10-22T21:36:15.948826Z" } }, "outputs": [], "source": [ "issues = \"\\n\".join([\" - \"+issue for issue in issue_listing])\n", "message += issues\n", "message += \"\\n\\n## Pull Requests\\n\"\n", "pulls = \"\\n\".join([\" - \"+pull for pull in pull_listing])\n", "message += pulls\n", "message" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.958242Z", "start_time": "2020-10-22T21:36:15.955072Z" } }, "outputs": [], "source": [ "people = \"\\n\".join([\" - \"+person for person in unique_authors])\n", "people" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.964776Z", "start_time": "2020-10-22T21:36:15.960214Z" } }, "outputs": [], "source": [ "message +=\"\\n\\nThe following individuals contributed to this release:\\n\\n{people}\".format(people=people)\n", "message" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.970565Z", "start_time": "2020-10-22T21:36:15.967251Z" } }, "outputs": [], "source": [ "head = \"# Changes\\n\\nVersion {version} ({release_date})\\n\\n\".format(version=__version__, release_date=release_date)\n", "print(head+message)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-10-22T21:36:15.975157Z", "start_time": "2020-10-22T21:36:15.972253Z" } }, "outputs": [], "source": [ "outfile = \"changelog.md\".format(version=__version__)\n", "with open(outfile, \"w\") as of:\n", " of.write(head+message)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---------------------" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:py3_spgh_dev]", "language": "python", "name": "conda-env-py3_spgh_dev-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }