{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## mapclassify Change Log Statistics\n", "\n", "This notebook generates the summary statistics for a package. \n", "\n", "It assumes you are running this under the `tools` directory at the toplevel of the package\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get date of last tag\n", "from subprocess import Popen, PIPE\n", "x, err = Popen('git log -1 --tags --simplify-by-decoration --pretty=\"%ai\"| cat', stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True).communicate()\n", "start_date = x.split()[0].decode('utf-8')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# today's date\n", "import datetime\n", "release_date = str(datetime.datetime.today()).split()[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "package_name = 'mapclassify'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook will generate a file in the current directory with the name \"changelog_VERSION.md\". You can edit and append this on front of the CHANGELOG file for the package release." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from __future__ import print_function\n", "import os\n", "import json\n", "import re\n", "import sys\n", "import pandas\n", "\n", "from datetime import datetime, timedelta\n", "from time import sleep\n", "from subprocess import check_output\n", "try:\n", " from urllib import urlopen\n", "except:\n", " from urllib.request import urlopen\n", "\n", "import ssl\n", "import yaml\n", "\n", "context = ssl._create_unverified_context()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "CWD = os.path.abspath(os.path.curdir)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "CWD" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "since_date = '--since=\"{start}\"'.format(start=start_date)\n", "since_date\n", "since = datetime.strptime(start_date+\" 0:0:0\", \"%Y-%m-%d %H:%M:%S\")\n", "since" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get __version__\n", "f = \"../{package}/__init__.py\".format(package=package_name)\n", "\n", "with open(f, 'r') as initfile:\n", " exec(initfile.readline())\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Total commits by subpackage" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cmd = ['git', 'log', '--oneline', since_date]\n", "ncommits = len(check_output(cmd).splitlines())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ncommits" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## List Contributors" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Some of our contributors have many aliases for the same identity. So, we've added a mapping to make sure that individuals are listed once (and only once). " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),\n", " 'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),\n", " 'Wei Kang': ('Wei Kang', 'weikang9009'),\n", " 'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas')\n", "}\n", "\n", "def regularize_identity(string):\n", " string = string.decode()\n", " for name, aliases in identities.items():\n", " for alias in aliases:\n", " if alias in string:\n", " string = string.replace(alias, name)\n", " if len(string.split(' '))>1:\n", " string = string.title()\n", " return string.lstrip('* ')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "author_cmd = ['git', 'log', '--format=* %aN', since_date]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from collections import Counter" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "ncommits = len(check_output(cmd).splitlines())\n", "all_authors = check_output(author_cmd).splitlines()\n", "counter = Counter([regularize_identity(author) for author in all_authors])\n", "# global_counter += counter\n", "# counters.update({'.'.join((package,subpackage)): counter})\n", "unique_authors = sorted(set(all_authors))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "unique_authors = counter.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "unique_authors" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Disaggregate by PR, Issue" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", "ISO8601 = \"%Y-%m-%dT%H:%M:%SZ\"\n", "PER_PAGE = 100\n", "element_pat = re.compile(r'<(.+?)>')\n", "rel_pat = re.compile(r'rel=[\\'\"](\\w+)[\\'\"]')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "def parse_link_header(headers):\n", " link_s = headers.get('link', '')\n", " urls = element_pat.findall(link_s)\n", " rels = rel_pat.findall(link_s)\n", " d = {}\n", " for rel,url in zip(rels, urls):\n", " d[rel] = url\n", " return d\n", "\n", "def get_paged_request(url):\n", " \"\"\"get a full list, handling APIv3's paging\"\"\"\n", " results = []\n", " while url:\n", " #print(\"fetching %s\" % url, file=sys.stderr)\n", " f = urlopen(url)\n", " results.extend(json.load(f))\n", " links = parse_link_header(f.headers)\n", " url = links.get('next')\n", " return results\n", "\n", "def get_issues(project=\"pysal/pysal\", state=\"closed\", pulls=False):\n", " \"\"\"Get a list of the issues from the Github API.\"\"\"\n", " which = 'pulls' if pulls else 'issues'\n", " url = \"https://api.github.com/repos/%s/%s?state=%s&per_page=%i\" % (project, which, state, PER_PAGE)\n", " return get_paged_request(url)\n", "\n", "\n", "def _parse_datetime(s):\n", " \"\"\"Parse dates in the format returned by the Github API.\"\"\"\n", " if s:\n", " return datetime.strptime(s, ISO8601)\n", " else:\n", " return datetime.fromtimestamp(0)\n", "\n", "\n", "def issues2dict(issues):\n", " \"\"\"Convert a list of issues to a dict, keyed by issue number.\"\"\"\n", " idict = {}\n", " for i in issues:\n", " idict[i['number']] = i\n", " return idict\n", "\n", "\n", "def is_pull_request(issue):\n", " \"\"\"Return True if the given issue is a pull request.\"\"\"\n", " return 'pull_request_url' in issue\n", "\n", "\n", "def issues_closed_since(period=timedelta(days=365), project=\"pysal/pysal\", pulls=False):\n", " \"\"\"Get all issues closed since a particular point in time. period\n", "can either be a datetime object, or a timedelta object. In the\n", "latter case, it is used as a time before the present.\"\"\"\n", "\n", " which = 'pulls' if pulls else 'issues'\n", "\n", " if isinstance(period, timedelta):\n", " period = datetime.now() - period\n", " url = \"https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i\" % (project, which, period.strftime(ISO8601), PER_PAGE)\n", " allclosed = get_paged_request(url)\n", " # allclosed = get_issues(project=project, state='closed', pulls=pulls, since=period)\n", " filtered = [i for i in allclosed if _parse_datetime(i['closed_at']) > period]\n", "\n", " # exclude rejected PRs\n", " if pulls:\n", " filtered = [ pr for pr in filtered if pr['merged_at'] ]\n", "\n", " return filtered\n", "\n", "\n", "def sorted_by_field(issues, field='closed_at', reverse=False):\n", " \"\"\"Return a list of issues sorted by closing date date.\"\"\"\n", " return sorted(issues, key = lambda i:i[field], reverse=reverse)\n", "\n", "\n", "def report(issues, show_urls=False):\n", " \"\"\"Summary report about a list of issues, printing number and title.\n", " \"\"\"\n", " # titles may have unicode in them, so we must encode everything below\n", " if show_urls:\n", " for i in issues:\n", " role = 'ghpull' if 'merged_at' in i else 'ghissue'\n", " print('* :%s:`%d`: %s' % (role, i['number'],\n", " i['title'].encode('utf-8')))\n", " else:\n", " for i in issues:\n", " print('* %d: %s' % (i['number'], i['title'].encode('utf-8')))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "all_issues = {}\n", "all_pulls = {}\n", "total_commits = 0\n", "#prj='pysal/libpysal'\n", "prj = 'spatialucr/{package}'.format(package=package_name)\n", "issues = issues_closed_since(since, project=prj,pulls=False)\n", "pulls = issues_closed_since(since, project=prj,pulls=True)\n", "issues = sorted_by_field(issues, reverse=True)\n", "pulls = sorted_by_field(pulls, reverse=True)\n", "n_issues, n_pulls = map(len, (issues, pulls))\n", "n_total = n_issues + n_pulls\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "issue_listing = []\n", "for issue in issues:\n", " entry = \"{title} (#{number})\".format(title=issue['title'],number=issue['number'])\n", " issue_listing.append(entry)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pull_listing = []\n", "for pull in pulls:\n", " entry = \"{title} (#{number})\".format(title=pull['title'],number=pull['number'])\n", " pull_listing.append(entry)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pull_listing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "message = \"We closed a total of {total} issues (enhancements and bug fixes) through {pr} pull requests\".format(total=n_total, pr=n_pulls)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "message = \"{msg}, since our last release on {previous}.\".format(msg=message, previous=str(start_date))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "message" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "message += \"\\n\\n## Issues Closed\\n\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(message)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "issues = \"\\n\".join([\" - \"+issue for issue in issue_listing])\n", "message += issues\n", "message += \"\\n\\n## Pull Requests\\n\"\n", "pulls = \"\\n\".join([\" - \"+pull for pull in pull_listing])\n", "message += pulls" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(message)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "people = \"\\n\".join([\" - \"+person for person in unique_authors])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(people)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "message +=\"\\n\\nThe following individuals contributed to this release:\\n\\n{people}\".format(people=people)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(message)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "head = \"# Changes\\n\\nVersion {version} ({release_date})\\n\\n\".format(version=__version__, release_date=release_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(head+message)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "outfile = 'changelog.md'\n", "with open(outfile, 'w') as of:\n", " of.write(head+message)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:mapclassify]", "language": "python", "name": "conda-env-mapclassify-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }