{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
additionsdeletionsfiletimestamp
0190src/main/java/org/springframework/samples/petc...2017-12-31 19:41:29
1550src/main/java/org/springframework/samples/petc...2017-12-30 12:48:20
2290src/main/java/org/springframework/samples/petc...2017-12-30 12:48:20
3099src/main/java/org/springframework/samples/petc...2017-12-30 00:38:54
4190src/main/java/org/springframework/samples/petc...2017-12-30 00:38:54
\n", "
" ], "text/plain": [ " additions deletions file \\\n", "0 19 0 src/main/java/org/springframework/samples/petc... \n", "1 55 0 src/main/java/org/springframework/samples/petc... \n", "2 29 0 src/main/java/org/springframework/samples/petc... \n", "3 0 99 src/main/java/org/springframework/samples/petc... \n", "4 19 0 src/main/java/org/springframework/samples/petc... \n", "\n", " timestamp \n", "0 2017-12-31 19:41:29 \n", "1 2017-12-30 12:48:20 \n", "2 2017-12-30 12:48:20 \n", "3 2017-12-30 00:38:54 \n", "4 2017-12-30 00:38:54 " ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "log = pd.read_csv(\"datasets/git_log_refactoring.gz\")\n", "log.head()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 31487 entries, 0 to 31486\n", "Data columns (total 4 columns):\n", "additions 31487 non-null int64\n", "deletions 31487 non-null int64\n", "file 31487 non-null object\n", "timestamp 31487 non-null object\n", "dtypes: int64(2), object(2)\n", "memory usage: 984.0+ KB\n" ] } ], "source": [ "log.info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
additionsdeletionsfiletimestamp
02500src/main/java/org/springframework/samples/petc...2013-05-15 03:35:33
1090src/main/java/org/springframework/samples/petc...2013-05-15 17:36:46
2930src/main/java/org/springframework/samples/petc...2013-05-15 17:36:46
3390src/main/java/org/springframework/samples/petc...2013-05-15 17:36:46
40139src/main/java/org/springframework/samples/petc...2013-05-15 17:36:46
\n", "
" ], "text/plain": [ " additions deletions file \\\n", "0 250 0 src/main/java/org/springframework/samples/petc... \n", "1 0 90 src/main/java/org/springframework/samples/petc... \n", "2 93 0 src/main/java/org/springframework/samples/petc... \n", "3 39 0 src/main/java/org/springframework/samples/petc... \n", "4 0 139 src/main/java/org/springframework/samples/petc... \n", "\n", " timestamp \n", "0 2013-05-15 03:35:33 \n", "1 2013-05-15 17:36:46 \n", "2 2013-05-15 17:36:46 \n", "3 2013-05-15 17:36:46 \n", "4 2013-05-15 17:36:46 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log['timestamp'] = pd.to_datetime(log['timestamp'])\n", "log = log.sort_values(by='timestamp').reset_index(drop=True)\n", "log.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
additionsdeletionsfiletimestamptype
02500src/main/java/org/springframework/samples/petc...2013-05-15 03:35:33jdbc
24500src/main/java/org/springframework/samples/petc...2013-05-16 02:15:44jdbc
55780src/main/java/org/springframework/samples/petc...2013-05-17 15:12:26jdbc
1861420src/main/java/org/springframework/samples/petc...2013-05-24 05:52:31jdbc
2431230src/main/java/org/springframework/samples/petc...2013-05-28 08:15:35jdbc
\n", "
" ], "text/plain": [ " additions deletions file \\\n", "0 250 0 src/main/java/org/springframework/samples/petc... \n", "24 50 0 src/main/java/org/springframework/samples/petc... \n", "55 78 0 src/main/java/org/springframework/samples/petc... \n", "186 142 0 src/main/java/org/springframework/samples/petc... \n", "243 123 0 src/main/java/org/springframework/samples/petc... \n", "\n", " timestamp type \n", "0 2013-05-15 03:35:33 jdbc \n", "24 2013-05-16 02:15:44 jdbc \n", "55 2013-05-17 15:12:26 jdbc \n", "186 2013-05-24 05:52:31 jdbc \n", "243 2013-05-28 08:15:35 jdbc " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log.loc[log['file'].str.contains(\"/jdbc/\"), 'type'] = 'jdbc'\n", "log.loc[log['file'].str.contains(\"/jpa/\"), 'type'] = 'jpa'\n", "log = log.dropna(subset=['type'])\n", "log.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
additionsdeletionsfiletimestamptypelines
02500src/main/java/org/springframework/samples/petc...2013-05-15 03:35:33jdbc250
24500src/main/java/org/springframework/samples/petc...2013-05-16 02:15:44jdbc50
55780src/main/java/org/springframework/samples/petc...2013-05-17 15:12:26jdbc78
1861420src/main/java/org/springframework/samples/petc...2013-05-24 05:52:31jdbc142
2431230src/main/java/org/springframework/samples/petc...2013-05-28 08:15:35jdbc123
\n", "
" ], "text/plain": [ " additions deletions file \\\n", "0 250 0 src/main/java/org/springframework/samples/petc... \n", "24 50 0 src/main/java/org/springframework/samples/petc... \n", "55 78 0 src/main/java/org/springframework/samples/petc... \n", "186 142 0 src/main/java/org/springframework/samples/petc... \n", "243 123 0 src/main/java/org/springframework/samples/petc... \n", "\n", " timestamp type lines \n", "0 2013-05-15 03:35:33 jdbc 250 \n", "24 2013-05-16 02:15:44 jdbc 50 \n", "55 2013-05-17 15:12:26 jdbc 78 \n", "186 2013-05-24 05:52:31 jdbc 142 \n", "243 2013-05-28 08:15:35 jdbc 123 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log['lines'] = log['additions'] - log['deletions']\n", "log.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "timestamp type\n", "2013-05-15 03:35:33 jdbc 250\n", "2013-05-16 02:15:44 jdbc 50\n", "2013-05-17 15:12:26 jdbc 78\n", "2013-05-24 05:52:31 jdbc 142\n", "2013-05-28 08:15:35 jdbc 123\n", "Name: lines, dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log_timed = log.groupby(['timestamp', 'type']).lines.sum()\n", "log_timed.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typejdbcjpa
timestamp
2013-05-15 03:35:332500
2013-05-16 02:15:443000
2013-05-17 15:12:263780
2013-05-24 05:52:315200
2013-05-28 08:15:356430
\n", "
" ], "text/plain": [ "type jdbc jpa\n", "timestamp \n", "2013-05-15 03:35:33 250 0\n", "2013-05-16 02:15:44 300 0\n", "2013-05-17 15:12:26 378 0\n", "2013-05-24 05:52:31 520 0\n", "2013-05-28 08:15:35 643 0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log_progess = log_timed.unstack(fill_value=0).cumsum()\n", "log_progess.head()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", "\r\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "from IPython.display import set_matplotlib_formats\n", "set_matplotlib_formats('svg')\n", "ax = log_progess.plot()\n", "ax.set_title(\"Reengineering of the database access technology\")\n", "ax.set_xlabel(\"time\")\n", "ax.set_ylabel(\"changes\");" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typejdbcjpa
year
201359400
2014129970
201574242655
201636144769
201733206761
\n", "
" ], "text/plain": [ "type jdbc jpa\n", "year \n", "2013 5940 0\n", "2014 12997 0\n", "2015 7424 2655\n", "2016 3614 4769\n", "2017 3320 6761" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "progress_per_year = log_progess.groupby(log_progess.index.year).last()\n", "progress_per_year.index.name = \"year\"\n", "progress_per_year" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "plt.savefig(\"reengineering.svg\", format=\"svg\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }