{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## mapclassify Change Log Statistics\n",
    "\n",
    "This notebook generates the summary statistics for a package. \n",
    "\n",
    "It assumes you are running this under the `tools` directory at the toplevel of the package\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  get date of last tag\n",
    "from subprocess import Popen, PIPE\n",
    "x, err = Popen('git log -1 --tags --simplify-by-decoration --pretty=\"%ai\"| cat', stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True).communicate()\n",
    "start_date = x.split()[0].decode('utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  today's date\n",
    "import datetime\n",
    "release_date = str(datetime.datetime.today()).split()[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "package_name = 'mapclassify'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook will generate a file in the current directory with the name \"changelog_VERSION.md\". You can edit and append this on front of the CHANGELOG file for the package release."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "import os\n",
    "import json\n",
    "import re\n",
    "import sys\n",
    "import pandas\n",
    "\n",
    "from datetime import datetime, timedelta\n",
    "from time import sleep\n",
    "from subprocess import check_output\n",
    "try:\n",
    "    from urllib import urlopen\n",
    "except:\n",
    "    from urllib.request import urlopen\n",
    "\n",
    "import ssl\n",
    "import yaml\n",
    "\n",
    "context = ssl._create_unverified_context()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "CWD = os.path.abspath(os.path.curdir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "CWD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "since_date = '--since=\"{start}\"'.format(start=start_date)\n",
    "since_date\n",
    "since = datetime.strptime(start_date+\" 0:0:0\", \"%Y-%m-%d %H:%M:%S\")\n",
    "since"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get __version__\n",
    "f = \"../{package}/__init__.py\".format(package=package_name)\n",
    "\n",
    "with open(f, 'r') as initfile:\n",
    "     exec(initfile.readline())\n",
    "   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Total commits by subpackage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cmd = ['git', 'log', '--oneline', since_date]\n",
    "ncommits = len(check_output(cmd).splitlines())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ncommits"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## List Contributors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some of our contributors have many aliases for the same identity. So, we've added a mapping to make sure that individuals are listed once (and only once). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),\n",
    "              'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),\n",
    "              'Wei Kang': ('Wei Kang', 'weikang9009'),\n",
    "              'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas')\n",
    "}\n",
    "\n",
    "def regularize_identity(string):\n",
    "    string = string.decode()\n",
    "    for name, aliases in identities.items():\n",
    "        for alias in aliases:\n",
    "            if alias in string:\n",
    "                string = string.replace(alias, name)\n",
    "    if len(string.split(' '))>1:\n",
    "        string = string.title()\n",
    "    return string.lstrip('* ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "author_cmd = ['git', 'log', '--format=* %aN', since_date]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "ncommits = len(check_output(cmd).splitlines())\n",
    "all_authors = check_output(author_cmd).splitlines()\n",
    "counter = Counter([regularize_identity(author) for author in all_authors])\n",
    "#        global_counter += counter\n",
    "#        counters.update({'.'.join((package,subpackage)): counter})\n",
    "unique_authors = sorted(set(all_authors))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_authors = counter.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_authors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Disaggregate by PR, Issue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime, timedelta\n",
    "ISO8601 = \"%Y-%m-%dT%H:%M:%SZ\"\n",
    "PER_PAGE = 100\n",
    "element_pat = re.compile(r'<(.+?)>')\n",
    "rel_pat = re.compile(r'rel=[\\'\"](\\w+)[\\'\"]')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def parse_link_header(headers):\n",
    "    link_s = headers.get('link', '')\n",
    "    urls = element_pat.findall(link_s)\n",
    "    rels = rel_pat.findall(link_s)\n",
    "    d = {}\n",
    "    for rel,url in zip(rels, urls):\n",
    "        d[rel] = url\n",
    "    return d\n",
    "\n",
    "def get_paged_request(url):\n",
    "    \"\"\"get a full list, handling APIv3's paging\"\"\"\n",
    "    results = []\n",
    "    while url:\n",
    "        #print(\"fetching %s\" % url, file=sys.stderr)\n",
    "        f = urlopen(url)\n",
    "        results.extend(json.load(f))\n",
    "        links = parse_link_header(f.headers)\n",
    "        url = links.get('next')\n",
    "    return results\n",
    "\n",
    "def get_issues(project=\"pysal/pysal\", state=\"closed\", pulls=False):\n",
    "    \"\"\"Get a list of the issues from the Github API.\"\"\"\n",
    "    which = 'pulls' if pulls else 'issues'\n",
    "    url = \"https://api.github.com/repos/%s/%s?state=%s&per_page=%i\" % (project, which, state, PER_PAGE)\n",
    "    return get_paged_request(url)\n",
    "\n",
    "\n",
    "def _parse_datetime(s):\n",
    "    \"\"\"Parse dates in the format returned by the Github API.\"\"\"\n",
    "    if s:\n",
    "        return datetime.strptime(s, ISO8601)\n",
    "    else:\n",
    "        return datetime.fromtimestamp(0)\n",
    "\n",
    "\n",
    "def issues2dict(issues):\n",
    "    \"\"\"Convert a list of issues to a dict, keyed by issue number.\"\"\"\n",
    "    idict = {}\n",
    "    for i in issues:\n",
    "        idict[i['number']] = i\n",
    "    return idict\n",
    "\n",
    "\n",
    "def is_pull_request(issue):\n",
    "    \"\"\"Return True if the given issue is a pull request.\"\"\"\n",
    "    return 'pull_request_url' in issue\n",
    "\n",
    "\n",
    "def issues_closed_since(period=timedelta(days=365), project=\"pysal/pysal\", pulls=False):\n",
    "    \"\"\"Get all issues closed since a particular point in time. period\n",
    "can either be a datetime object, or a timedelta object. In the\n",
    "latter case, it is used as a time before the present.\"\"\"\n",
    "\n",
    "    which = 'pulls' if pulls else 'issues'\n",
    "\n",
    "    if isinstance(period, timedelta):\n",
    "        period = datetime.now() - period\n",
    "    url = \"https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i\" % (project, which, period.strftime(ISO8601), PER_PAGE)\n",
    "    allclosed = get_paged_request(url)\n",
    "    # allclosed = get_issues(project=project, state='closed', pulls=pulls, since=period)\n",
    "    filtered = [i for i in allclosed if _parse_datetime(i['closed_at']) > period]\n",
    "\n",
    "    # exclude rejected PRs\n",
    "    if pulls:\n",
    "        filtered = [ pr for pr in filtered if pr['merged_at'] ]\n",
    "\n",
    "    return filtered\n",
    "\n",
    "\n",
    "def sorted_by_field(issues, field='closed_at', reverse=False):\n",
    "    \"\"\"Return a list of issues sorted by closing date date.\"\"\"\n",
    "    return sorted(issues, key = lambda i:i[field], reverse=reverse)\n",
    "\n",
    "\n",
    "def report(issues, show_urls=False):\n",
    "    \"\"\"Summary report about a list of issues, printing number and title.\n",
    "    \"\"\"\n",
    "    # titles may have unicode in them, so we must encode everything below\n",
    "    if show_urls:\n",
    "        for i in issues:\n",
    "            role = 'ghpull' if 'merged_at' in i else 'ghissue'\n",
    "            print('* :%s:`%d`: %s' % (role, i['number'],\n",
    "                                        i['title'].encode('utf-8')))\n",
    "    else:\n",
    "        for i in issues:\n",
    "            print('* %d: %s' % (i['number'], i['title'].encode('utf-8')))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "all_issues = {}\n",
    "all_pulls = {}\n",
    "total_commits = 0\n",
    "#prj='pysal/libpysal'\n",
    "prj = 'spatialucr/{package}'.format(package=package_name)\n",
    "issues = issues_closed_since(since, project=prj,pulls=False)\n",
    "pulls = issues_closed_since(since, project=prj,pulls=True)\n",
    "issues = sorted_by_field(issues, reverse=True)\n",
    "pulls = sorted_by_field(pulls, reverse=True)\n",
    "n_issues, n_pulls = map(len, (issues, pulls))\n",
    "n_total = n_issues + n_pulls\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "issue_listing = []\n",
    "for issue in issues:\n",
    "    entry = \"{title} (#{number})\".format(title=issue['title'],number=issue['number'])\n",
    "    issue_listing.append(entry)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pull_listing = []\n",
    "for pull in pulls:\n",
    "    entry = \"{title} (#{number})\".format(title=pull['title'],number=pull['number'])\n",
    "    pull_listing.append(entry)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pull_listing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "message = \"We closed a total of {total} issues (enhancements and bug fixes) through {pr} pull requests\".format(total=n_total, pr=n_pulls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "message = \"{msg}, since our last release on {previous}.\".format(msg=message, previous=str(start_date))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "message"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "message += \"\\n\\n## Issues Closed\\n\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "issues = \"\\n\".join([\"  - \"+issue for issue in issue_listing])\n",
    "message += issues\n",
    "message += \"\\n\\n## Pull Requests\\n\"\n",
    "pulls = \"\\n\".join([\"  - \"+pull for pull in pull_listing])\n",
    "message += pulls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "people = \"\\n\".join([\"  - \"+person for person in unique_authors])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(people)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "message +=\"\\n\\nThe following individuals contributed to this release:\\n\\n{people}\".format(people=people)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "head = \"# Changes\\n\\nVersion {version} ({release_date})\\n\\n\".format(version=__version__, release_date=release_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(head+message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outfile = 'changelog.md'\n",
    "with open(outfile, 'w') as of:\n",
    "    of.write(head+message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:mapclassify]",
   "language": "python",
   "name": "conda-env-mapclassify-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}