{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" additions | \n",
" deletions | \n",
" file | \n",
" timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 19 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2017-12-31 19:41:29 | \n",
"
\n",
" \n",
" 1 | \n",
" 55 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2017-12-30 12:48:20 | \n",
"
\n",
" \n",
" 2 | \n",
" 29 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2017-12-30 12:48:20 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 99 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2017-12-30 00:38:54 | \n",
"
\n",
" \n",
" 4 | \n",
" 19 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2017-12-30 00:38:54 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" additions deletions file \\\n",
"0 19 0 src/main/java/org/springframework/samples/petc... \n",
"1 55 0 src/main/java/org/springframework/samples/petc... \n",
"2 29 0 src/main/java/org/springframework/samples/petc... \n",
"3 0 99 src/main/java/org/springframework/samples/petc... \n",
"4 19 0 src/main/java/org/springframework/samples/petc... \n",
"\n",
" timestamp \n",
"0 2017-12-31 19:41:29 \n",
"1 2017-12-30 12:48:20 \n",
"2 2017-12-30 12:48:20 \n",
"3 2017-12-30 00:38:54 \n",
"4 2017-12-30 00:38:54 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"log = pd.read_csv(\"datasets/git_log_refactoring.gz\")\n",
"log.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 31487 entries, 0 to 31486\n",
"Data columns (total 4 columns):\n",
"additions 31487 non-null int64\n",
"deletions 31487 non-null int64\n",
"file 31487 non-null object\n",
"timestamp 31487 non-null object\n",
"dtypes: int64(2), object(2)\n",
"memory usage: 984.0+ KB\n"
]
}
],
"source": [
"log.info()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" additions | \n",
" deletions | \n",
" file | \n",
" timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 250 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-15 03:35:33 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 90 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-15 17:36:46 | \n",
"
\n",
" \n",
" 2 | \n",
" 93 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-15 17:36:46 | \n",
"
\n",
" \n",
" 3 | \n",
" 39 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-15 17:36:46 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 139 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-15 17:36:46 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" additions deletions file \\\n",
"0 250 0 src/main/java/org/springframework/samples/petc... \n",
"1 0 90 src/main/java/org/springframework/samples/petc... \n",
"2 93 0 src/main/java/org/springframework/samples/petc... \n",
"3 39 0 src/main/java/org/springframework/samples/petc... \n",
"4 0 139 src/main/java/org/springframework/samples/petc... \n",
"\n",
" timestamp \n",
"0 2013-05-15 03:35:33 \n",
"1 2013-05-15 17:36:46 \n",
"2 2013-05-15 17:36:46 \n",
"3 2013-05-15 17:36:46 \n",
"4 2013-05-15 17:36:46 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log['timestamp'] = pd.to_datetime(log['timestamp'])\n",
"log = log.sort_values(by='timestamp').reset_index(drop=True)\n",
"log.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" additions | \n",
" deletions | \n",
" file | \n",
" timestamp | \n",
" type | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 250 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-15 03:35:33 | \n",
" jdbc | \n",
"
\n",
" \n",
" 24 | \n",
" 50 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-16 02:15:44 | \n",
" jdbc | \n",
"
\n",
" \n",
" 55 | \n",
" 78 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-17 15:12:26 | \n",
" jdbc | \n",
"
\n",
" \n",
" 186 | \n",
" 142 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-24 05:52:31 | \n",
" jdbc | \n",
"
\n",
" \n",
" 243 | \n",
" 123 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-28 08:15:35 | \n",
" jdbc | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" additions deletions file \\\n",
"0 250 0 src/main/java/org/springframework/samples/petc... \n",
"24 50 0 src/main/java/org/springframework/samples/petc... \n",
"55 78 0 src/main/java/org/springframework/samples/petc... \n",
"186 142 0 src/main/java/org/springframework/samples/petc... \n",
"243 123 0 src/main/java/org/springframework/samples/petc... \n",
"\n",
" timestamp type \n",
"0 2013-05-15 03:35:33 jdbc \n",
"24 2013-05-16 02:15:44 jdbc \n",
"55 2013-05-17 15:12:26 jdbc \n",
"186 2013-05-24 05:52:31 jdbc \n",
"243 2013-05-28 08:15:35 jdbc "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log.loc[log['file'].str.contains(\"/jdbc/\"), 'type'] = 'jdbc'\n",
"log.loc[log['file'].str.contains(\"/jpa/\"), 'type'] = 'jpa'\n",
"log = log.dropna(subset=['type'])\n",
"log.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" additions | \n",
" deletions | \n",
" file | \n",
" timestamp | \n",
" type | \n",
" lines | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 250 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-15 03:35:33 | \n",
" jdbc | \n",
" 250 | \n",
"
\n",
" \n",
" 24 | \n",
" 50 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-16 02:15:44 | \n",
" jdbc | \n",
" 50 | \n",
"
\n",
" \n",
" 55 | \n",
" 78 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-17 15:12:26 | \n",
" jdbc | \n",
" 78 | \n",
"
\n",
" \n",
" 186 | \n",
" 142 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-24 05:52:31 | \n",
" jdbc | \n",
" 142 | \n",
"
\n",
" \n",
" 243 | \n",
" 123 | \n",
" 0 | \n",
" src/main/java/org/springframework/samples/petc... | \n",
" 2013-05-28 08:15:35 | \n",
" jdbc | \n",
" 123 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" additions deletions file \\\n",
"0 250 0 src/main/java/org/springframework/samples/petc... \n",
"24 50 0 src/main/java/org/springframework/samples/petc... \n",
"55 78 0 src/main/java/org/springframework/samples/petc... \n",
"186 142 0 src/main/java/org/springframework/samples/petc... \n",
"243 123 0 src/main/java/org/springframework/samples/petc... \n",
"\n",
" timestamp type lines \n",
"0 2013-05-15 03:35:33 jdbc 250 \n",
"24 2013-05-16 02:15:44 jdbc 50 \n",
"55 2013-05-17 15:12:26 jdbc 78 \n",
"186 2013-05-24 05:52:31 jdbc 142 \n",
"243 2013-05-28 08:15:35 jdbc 123 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log['lines'] = log['additions'] - log['deletions']\n",
"log.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"timestamp type\n",
"2013-05-15 03:35:33 jdbc 250\n",
"2013-05-16 02:15:44 jdbc 50\n",
"2013-05-17 15:12:26 jdbc 78\n",
"2013-05-24 05:52:31 jdbc 142\n",
"2013-05-28 08:15:35 jdbc 123\n",
"Name: lines, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_timed = log.groupby(['timestamp', 'type']).lines.sum()\n",
"log_timed.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" type | \n",
" jdbc | \n",
" jpa | \n",
"
\n",
" \n",
" timestamp | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-05-15 03:35:33 | \n",
" 250 | \n",
" 0 | \n",
"
\n",
" \n",
" 2013-05-16 02:15:44 | \n",
" 300 | \n",
" 0 | \n",
"
\n",
" \n",
" 2013-05-17 15:12:26 | \n",
" 378 | \n",
" 0 | \n",
"
\n",
" \n",
" 2013-05-24 05:52:31 | \n",
" 520 | \n",
" 0 | \n",
"
\n",
" \n",
" 2013-05-28 08:15:35 | \n",
" 643 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"type jdbc jpa\n",
"timestamp \n",
"2013-05-15 03:35:33 250 0\n",
"2013-05-16 02:15:44 300 0\n",
"2013-05-17 15:12:26 378 0\n",
"2013-05-24 05:52:31 520 0\n",
"2013-05-28 08:15:35 643 0"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"log_progess = log_timed.unstack(fill_value=0).cumsum()\n",
"log_progess.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%matplotlib inline\n",
"from IPython.display import set_matplotlib_formats\n",
"set_matplotlib_formats('svg')\n",
"ax = log_progess.plot()\n",
"ax.set_title(\"Reengineering of the database access technology\")\n",
"ax.set_xlabel(\"time\")\n",
"ax.set_ylabel(\"changes\");"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" type | \n",
" jdbc | \n",
" jpa | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013 | \n",
" 5940 | \n",
" 0 | \n",
"
\n",
" \n",
" 2014 | \n",
" 12997 | \n",
" 0 | \n",
"
\n",
" \n",
" 2015 | \n",
" 7424 | \n",
" 2655 | \n",
"
\n",
" \n",
" 2016 | \n",
" 3614 | \n",
" 4769 | \n",
"
\n",
" \n",
" 2017 | \n",
" 3320 | \n",
" 6761 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"type jdbc jpa\n",
"year \n",
"2013 5940 0\n",
"2014 12997 0\n",
"2015 7424 2655\n",
"2016 3614 4769\n",
"2017 3320 6761"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"progress_per_year = log_progess.groupby(log_progess.index.year).last()\n",
"progress_per_year.index.name = \"year\"\n",
"progress_per_year"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.savefig(\"reengineering.svg\", format=\"svg\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}