{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Counting lines of code in a git repo." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ " import importnb\n", " from pathlib import Path\n", " from pandas import *\n", " import mimetypes\n", " from toolz.curried import *\n", " from poser import *\n", " from toolz.curried import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ " mimetypes.add_type('application/x-ipynb+json', '.ipynb')\n", " mimetypes.add_type('text/markdown', '.md')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def files_by_type():\n", " contents = list(filter(\n", " compose(all, juxt(\n", " Path.is_file, \n", " lambda x: all(\n", " s not in str(x)for s in ('__pycache__', 'checkpoint')\n", " ))), \n", " Path(importnb.__file__).parent.parent.rglob('*')))\n", " return Series(pipe(\n", " contents, map(compose(first, mimetypes.guess_type, str))\n", " ), index=contents, name='mime').to_frame().reset_index().set_index('mime').pipe(\n", " lambda df: df.groupby(df.index).agg(compose(list, unique))\n", " )['index'].apply(Series)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ " def count_lines(str):\n", " significant, empty, comment = 0, 0, 0\n", " for line in str.splitlines():\n", " if line.strip():\n", " if line.lstrip().startswith('#'): comment +=1 \n", " else: significant += 1\n", " else: empty += 1\n", " return Series(dict(zip(('significant', 'empty', 'comment'), (significant, empty, comment))))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ " import json\n", " from pandas import concat as Concat\n", " from IPython.utils.capture import capture_output" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ " def nb_info(df):\n", " \"\"\"Number of cells maybe\"\"\"\n", " notebooks = df.loc['application/x-ipynb+json'].dropna().apply(compose(\n", " Series, get('cells'), json.loads, Path.read_text\n", " )).set_index(df.loc['application/x-ipynb+json'].dropna().values).stack().apply(\n", " lambda x: Series({'cell_type': x['cell_type'], 'source': '\\n'.join(x.get('source'))})\n", " ).set_index('cell_type', append=True).reorder_levels((2,0,1))\n", " notebooks = notebooks.reset_index(0, drop=True).join(\n", " notebooks.loc['code']['source'].apply(count_lines)\n", " ).set_index(notebooks.index).unstack(-1)\n", " return notebooks['significant'].sum(axis=1).unstack(0).fillna(0)['code']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ " from IPython import get_ipython" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ " def iterate_over_the_project(project, max=10, iter=20, **loc):\n", " repo, project = project\n", " if not Path(project).exists():\n", " !git clone $repo\n", " !cd $project && git pull \n", " for i in range(max):\n", " with capture_output() as time:\n", " !cd $project && git log -1 --format=%cd --date=local \n", " loc[to_datetime(time.stdout)] = compose(nb_info, files_by_type)()\n", " !cd $project && git reset --hard HEAD~$iter\n", " !cd $project && git pull \n", " return loc" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "fatal: repository 'None' does not exist\n", "The system cannot find the path specified.\n" ] }, { "ename": "TypeError", "evalue": "'builtin_function_or_method' object cannot be interpreted as an integer", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0miterate_over_the_project\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'importnb'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m\u001b[0m in \u001b[0;36miterate_over_the_project\u001b[1;34m(project, **loc)\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'git clone $repo'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'cd $project && git pull '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmax\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 7\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mcapture_output\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'cd $project && git log -1 --format=%cd --date=local '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTypeError\u001b[0m: 'builtin_function_or_method' object cannot be interpreted as an integer" ] } ], "source": [ " def _test_with_importnb():\n", " assert iterate_over_the_project(('https://github.com/deathbeds/importnb', 'importnb'))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "if __name__ == '__main__':\n", " %matplotlib inline\n", " Concat(loc).unstack(-1).fillna(0).sum(axis=1).plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }