{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 4, "hidden": false, "row": 0, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "import pandas as pd \n", "import plotly.express as px\n", "import pygal as pg\n", "from string import Template\n", "from IPython.core.display import display, HTML\n", "\n", "%load_ext cypher\n", "%config CypherMagic.uri='http://neo4j:neo@localhost:7474/db/data'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "from IPython.display import HTML, Javascript, display\n", "\n", "def configure_d3():\n", " \"\"\"Tell require where to get d3 from in `require(['d3'])`\"\"\"\n", " display(Javascript(\"\"\"\n", " require.config({ \n", " paths: {\n", " lodash: \"https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.15/lodash.min\", \n", " d3: \"https://d3js.org/d3.v4.min\"\n", " }\n", " })\"\"\"))\n", "\n", "\n", "configure_d3()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "base_html = \"\"\"\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "
\n", " {rendered_chart}\n", "
\n", " \n", "\n", "\"\"\"" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 4, "height": 4, "hidden": false, "row": 0, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "slide" } }, "source": [ "# Einführung in Software Analytics" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 5, "hidden": false, "row": 4, "width": 12 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "source": [ "## Resourcen\n", "\n", "* https://python-graph-gallery.com/\n", "* https://www.data-to-viz.com/" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 4, "hidden": false, "row": 0, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "slide" } }, "source": [ "## Vorbereitung der Software Analyse" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 10, "hidden": false, "row": 9, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher \n", "// Modulstruktur der Shopizer-Anwendung\n", "MATCH (module:Project{groupId: \"com.shopizer\"}),\n", " (module)-[:HAS_PARENT]->(parent:Project),\n", " (module)-[:CREATES]->(artifact:Main:Artifact)\n", "RETURN parent.artifactId AS Parent, \n", " module.artifactId AS Module, \n", " artifact.fqn AS Artifact\n", "ORDER BY module.artifactId" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 4, "height": 7, "hidden": false, "row": 9, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Markierung aller Shopizer-Knoten\n", "MATCH (artifact:Main:Artifact{group: \"com.shopizer\"})\n", "SET artifact:Shopizer\n", "WITH artifact\n", "MATCH (artifact)-[:CONTAINS]->(c)\n", "SET c:Shopizer\n", "RETURN artifact.name AS Artifact, \n", " count(DISTINCT c) AS ContentCount\n", "ORDER BY artifact.name" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Übersicht über das System" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "packageHierarchy = %cypher MATCH (p:Package:Shopizer)-[:CONTAINS*]->(t:Type:Shopizer) \\\n", " WITH DISTINCT p.fqn AS packageName \\\n", " MATCH (p:Package{fqn: packageName})-[:CONTAINS]->(child:Shopizer) \\\n", " WHERE (child:Package AND exists((child)-[:CONTAINS*]->(:Type:Shopizer))) OR child:Type \\\n", " WITH p, child, child:Type AS leaf \\\n", " RETURN DISTINCT p.fqn AS Parent_Fqn, p.name AS Parent_Name, child.fqn AS Child_Fqn, child.name AS Child_Name, leaf AS Child_Is_Leaf \\\n", " ORDER BY Parent_Fqn\n", "\n", "package_hierarchy_df = packageHierarchy.get_dataframe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "text = Template(open('vis/circle-packing/circle-packing-diagram.html', 'r').read().replace(\"\\n\",\"\")).substitute({\n", " 'circle_data': package_hierarchy_df.to_csv(index = False).replace(\"\\r\\n\",\"\\\\n\").replace(\"\\n\",\"\\\\n\"),\n", " 'container': 'type-packing-diagram'\n", "})\n", "\n", "HTML(text)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Was sind Probleme dieser Ansicht?" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "* 1-zu-1 Abbildung der Package- und Klassen-Hierarchie\n", " * Abhängigkeiten zwischen Klassen nicht sichtbar\n", " * Passt die Strukturierung zu der Anwendung?\n", " * Technische (Schichten-) Architektur versteckt\n", " * Fachliche Komponenten nicht existent\n", " * Kritische Anwendungsteile nicht erkennbar\n", " \n", "→ Eigentliche Struktur der Anwendung bleibt unsichtbar" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Aufkommende Fragen\n", " * Wie ist die Anwendung technisch strukturiert?\n", " * Gibt es Architekturverletzungen (z.B. Schichtverletzungen)?\n", " * Welche technischen Frameworks finden in den einzelnen Schichten Verwendung?\n", " * Wie ist die Anwendung fachlich Strukturiert?\n", " * Welche fachlichen Module existieren?\n", " * Wie hängen die einzelnen Fachlichkeiten miteinander zusammen?\n", " * Entsprechen die implementierten Abhängigkeiten der fachlichen Realität?" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Aufkommende Fragen?\n", " * Was sind potentielle Gefahrenstellen?\n", " * Welche Klassen sind potentielle Problemquellen?\n", " * Welche Klassen sind besonders groß und haben eine hohe Komplexität?\n", " * Welche Klassen werden besonders häufig geändert?\n", " * Welche Klassen müssen häufig gefixed werden?\n", " * Welche fachlichen Module sind potentielle Problemquellen?\n", " * Welche fachlichen Module werden am häufigsten geändert?\n", " * Welche fachlichen Module werden von welchen Entwicklern gekannt?\n", " * Wie verhalten sich Robert C Martins \"Package-Metriken\" für die fachlichen Module?" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 4, "hidden": false, "row": 9, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "slide" } }, "source": [ "## Wie ist die Anwendung technisch struktuiert?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "moduleDependencies = %cypher MATCH (a1:Artifact:Shopizer)-[:CONTAINS]->(t1:Type:Shopizer), \\\n", " (a2:Artifact:Shopizer)-[:CONTAINS]->(t2:Type:Shopizer), \\\n", " (t1)-[dep:DEPENDS_ON]->(t2) \\\n", " WHERE a1 <> a2 \\\n", " RETURN a1.name AS Source, \\\n", " a2.name AS Target, \\\n", " COUNT(dep) AS X_Count \\\n", " ORDER BY Source DESC\n", "\n", "moduleDependenciesData = moduleDependencies.get_dataframe().to_csv(index = False).replace(\"\\r\\n\",\"\\\\n\").replace(\"\\n\",\"\\\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "text = Template(open('vis/chord/chord-diagram.html', 'r').read().replace(\"\\n\",\"\")).substitute({\n", " 'chord_data': moduleDependenciesData, \n", " 'container': 'module-chord-diagram'})\n", "\n", "HTML(text)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 4, "hidden": false, "row": 13, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Anlegen von Knoten für technische Schichten\n", "MERGE (pres:Layer{name: 'Presentation'})\n", "MERGE (dom:Layer{name: 'Domain'})\n", "MERGE (dat:Layer{name: 'Data'})" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 4, "height": 5, "hidden": false, "row": 24, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Zuordnen von Klassen zu den Layern\n", "WITH [\n", " ['Presentation','sm-shop'],\n", " ['Presentation','sm-shop-model'],\n", " ['Domain','sm-core'],\n", " ['Domain','sm-core-modules'],\n", " ['Data','sm-core-model']\n", "] AS layerAssignments\n", "UNWIND layerAssignments AS layerAssignment\n", "MATCH (l:Layer{name: layerAssignment[0]}),\n", " (a:Artifact:Shopizer{name: layerAssignment[1]}),\n", " (a)-[:CONTAINS]->(t:Type)\n", "MERGE (l)-[:CONTAINS]->(t) \n", "RETURN l.name AS Layer, count(DISTINCT t) AS Classes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 8, "hidden": false, "row": 24, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Abhängigkeiten zwischen Schichten\n", "MATCH (l1:Layer)-[:CONTAINS]->(t1:Type),\n", " (l2:Layer)-[:CONTAINS]->(t2:Type),\n", " (t1)-[d:DEPENDS_ON]->(t2)\n", "WITH l1, l2, sum(d.weight) AS weight, count(d) AS count\n", "MERGE (l1)-[d:DEPENDS_ON{weight: weight, count: count}]->(l2)\n", "RETURN l1.name AS Source, d.weight AS Weight, d.count AS Count, l2.name AS Target\n", "ORDER BY Source, Target" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 12, "hidden": false, "row": 32, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Verwendete Frameworks nach Layer\n", "MATCH (l:Layer)-[:CONTAINS]->(:Type)-[:DEPENDS_ON]->(dep:Type)<-[:REQUIRES]-(a:Artifact)\n", "WHERE NOT (\n", " dep.fqn starts with \"java.\"\n", " or dep.fqn in [\"void\",\"byte\",\"int\",\"long\",\"double\",\"boolean\", \"char\"]\n", ")\n", "WITH DISTINCT l, split(dep.fqn, \".\") AS dep\n", "WITH dep[0] + \".\" + dep[1] + \".\" + dep[2] AS dep, l\n", "ORDER BY l.name\n", "RETURN dep AS Dependency, collect(DISTINCT l.name) AS Layer\n", "ORDER BY size(Layer) DESC" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 4, "hidden": false, "row": 34, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "slide" } }, "source": [ "## Wie ist die Anwendung fachlich strukturiert?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 5, "hidden": false, "row": 38, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Identifikation der Packages unterhalb von \"com.salesmanager.core.business.services\"\n", "MATCH (p:Package:Shopizer)-[:CONTAINS]->(bC:Package:Shopizer)\n", "WHERE p.fqn = \"com.salesmanager.core.business.services\"\n", "WITH bC\n", "ORDER BY bC.name\n", "RETURN collect(bC.name) AS BoundedContexts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 4, "hidden": false, "row": 43, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Anlegen eines Knoten je Fachlichkeit\n", "MATCH (p:Package:Shopizer)-[:CONTAINS]->(bC:Package:Shopizer)\n", "WHERE p.fqn = \"com.salesmanager.core.business.services\"\n", "WITH collect(DISTINCT bC.name) AS boundedContexts\n", "UNWIND boundedContexts AS boundedContext\n", "MERGE (:BoundedContext {name: boundedContext})" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 4, "hidden": false, "row": 44, "width": 4 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Zuordnen der Klassen zu den Bounded Contexts\n", "MATCH (bC:BoundedContext),\n", " (p:Package:Shopizer)-[:CONTAINS*]->(t:Type:Shopizer)\n", "WHERE p.name = bC.name\n", "MERGE (bC)-[:CONTAINS]->(t)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Aufgabe: Bestimme den Anteil der zu BoundContexts zugeordneten Klassen am Gesamtsystem Shopizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 12, "hidden": false, "row": 107, "width": 5 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Nicht zugeordnete Klassen\n", "MATCH (p:Package)-[:CONTAINS*]->(t:Type:Shopizer)\n", "WHERE NOT EXISTS((:BoundedContext)-[:CONTAINS]->(t))\n", "RETURN p.fqn, count(DISTINCT t) AS Count\n", "ORDER BY Count DESC" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 7, "hidden": false, "row": 119, "width": 12 }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "source": [ "* Hinweis: Es ergibt Sinn, die Liste der nicht zugeordneten Klassen durchzugehen und diese manuell über eine zusätzliche Query einem Bounded Context zuzuordnen\n", "\n", "```cypher\n", "WITH [] AS packageMappings\n", "UNWIND packageMappings AS packageMapping\n", "MATCH (bC:BoundedContext{name: packageMapping[0]),\n", " (p:Package{fqn: packageMapping[1])-[:CONTAINS]->(t:Type:Shopizer)\n", "MERGE (bC)-[:CONTAINS]->(t)\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 20, "hidden": false, "row": 48, "width": null }, "report_default": { "hidden": false } } } }, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "subdomainSize = %cypher MATCH (bC:BoundedContext), \\\n", " (p:Package:Shopizer)-[:CONTAINS*]->(t:Type:Shopizer) \\\n", " WHERE p.name = bC.name \\\n", " MERGE (bC)-[:CONTAINS]->(t) \\\n", " RETURN bC.name AS BoundedContext, \\\n", " count(DISTINCT t) AS Classes\n", "\n", "df = subdomainSize.get_dataframe()\n", "fig = px.pie(df, values='Classes', names='BoundedContext', title='Größe der einzelnen Bounded Contexts')\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "# Abhängigkeiten zwischen Bounded Contexts (Domain Layer)\n", "bCRelations = %cypher MATCH (bC1:BoundedContext)-[:CONTAINS]->(t1:Type:Shopizer), \\\n", " (dL:Layer{name:\"Domain\"})-[:CONTAINS]->(t1), \\\n", " (bC2:BoundedContext)-[:CONTAINS]->(t2:Type:Shopizer), \\\n", " (dL)-[:CONTAINS]->(t2), \\\n", " (t1)-[dep:DEPENDS_ON]->(t2) \\\n", " RETURN bC1.name AS Source, \\\n", " bC2.name AS Target, \\\n", " sum(dep.weight) AS X_Count\n", "\n", "bounded_context_connections = bCRelations.get_dataframe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "text = Template(open('vis/chord/chord-diagram.html', 'r').read().replace(\"\\n\",\"\")).substitute({\n", " 'chord_data': bounded_context_connections.to_csv(index = False).replace(\"\\r\\n\",\"\\\\n\").replace(\"\\n\",\"\\\\n\"), \n", " 'container': 'bc-chord-diagram'})\n", "\n", "HTML(text)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Vorbereitung der Repository-Analysen" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Identifikation von Merge-Commits\n", "MATCH (c:Commit)-[:HAS_PARENT]->(p:Commit)\n", "WITH c, count(p) as parents\n", "WHERE parents > 1\n", "SET c:Merge\n", "RETURN count(c)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Bereinigung von Autor-Duplikaten (nach Name)\n", "MATCH (a:Author)\n", "WITH a.name as name, collect(a) as authors\n", "WITH head(authors) as author, tail(authors) as duplicates\n", "UNWIND duplicates as duplicate\n", "MATCH (duplicate)-[:COMMITTED]->(c:Commit)\n", "MERGE (author)-[:COMMITTED]->(c)\n", "DETACH DELETE duplicate\n", "RETURN author.name, count(duplicate)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Bereinigung von Autor-Duplikaten (nach E-Mail)\n", "MATCH (a:Author)\n", "WITH a.email as email, collect(a) as authors\n", "WITH head(authors) as author, tail(authors) as duplicates\n", "UNWIND duplicates as duplicate\n", "MATCH (duplicate)-[:COMMITTED]->(c:Commit)\n", "MERGE (author)-[:COMMITTED]->(c)\n", "DETACH DELETE duplicate\n", "RETURN author.name, author.email, count(duplicate)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Bereinigung von Autor-Duplikaten (Manuelles Postprocessing)\n", "WITH [\n", " [\"Carl Samson\", \"csamson777@yahoo.com\", \"c.samson@cgi.com\"],\n", " [\"Carl Samson\", \"csamson777@yahoo.com\", \"carlsamson@Carls-MacBook-Pro-2.local\"],\n", " [\"Umesh Awasthi\", \"UAwasthi@rccl.com\", \"umeshawasthi@gmail.com\"]\n", "] AS authors\n", "UNWIND authors AS duplicateAuthor\n", "MATCH (author:Author{email: duplicateAuthor[1]}),\n", " (duplicate:Author{email: duplicateAuthor[2]})\n", "SET author.name = duplicateAuthor[0] \n", "WITH author, duplicate\n", "MATCH (duplicate)-[:COMMITTED]->(c:Commit)\n", "MERGE (author)-[:COMMITTED]->(c)\n", "DETACH DELETE duplicate\n", "RETURN author.name, author.email, count(duplicate)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "commitsPerAuthor = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit), \\\n", " (c)-[:CONTAINS_CHANGE]->(:Change)-[:MODIFIES]->(file:File) \\\n", " WHERE NOT c:Merge \\\n", " WITH a, count(DISTINCT c) AS Commits \\\n", " WHERE Commits > 1 \\\n", " RETURN a.name as Entwickler, Commits \\\n", " ORDER BY Commits DESC\n", "\n", "commitsPerAuthor_df = commitsPerAuthor.get_dataframe()\n", "\n", "#Visualisierung\n", "\n", "bar_chart = pg.Bar(show_legend=True, human_readable=True, \n", "fill=True, legend_at_bottom=True, legend_at_bottom_columns=2)\n", "bar_chart.title = 'Entwickler mit den meisten Commits'\n", "for index, row in commitsPerAuthor_df.iterrows():\n", " bar_chart.add(row['Entwickler'],[{\"value\": row['Commits']}])\n", "display(HTML(base_html.format(rendered_chart=bar_chart.render(is_unicode=True))))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Index für Filepath\n", "CREATE INDEX ON :File(relativePath)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Verknüpfen von Git :File und Java :Type\n", "MATCH (p:Package:Shopizer)-[:CONTAINS]->(t:Type:Shopizer)\n", "WITH t, p.fileName + \"/\" + t.sourceFileName as sourceFileName\n", "MATCH (f:Git:File)\n", "WHERE f.relativePath ENDS WITH sourceFileName\n", "MERGE (f)-[:HAS_SOURCE]->(t)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Welche Klassen sind potentielle Problemquellen?" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "### Welche Klassen sind besonders groß und haben eine hohe Komplexität?" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Aufgabe: Bestimmung der größten Klassen\n", "* Hinweis: Die Größe einer Klasse kann mittels 'lastLineNumber' des :Method -Knoten annähernd bestimmt werden" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "%%cypher\n", "// Bestimmung der Klassen mit der höchsten durchschnittlichen zyklomatischen Komplexität\n", "MATCH (t:Type:Shopizer)-[DECLARES]->(m:Method) \n", "WHERE EXISTS(m.cyclomaticComplexity)\n", "WITH t, sum(m.cyclomaticComplexity) / toFloat(count(m)) AS AverageComplexity\n", "RETURN DISTINCT t.fqn AS Type, \n", " AverageComplexity \n", "ORDER BY AverageComplexity DESC" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Robert C. Martin definiert Komplexitätsmetriken für Packages\n", " * Anwendung kann auch für Klassen oder Module erfolgen (im folgenden zusammengefasst als Komponente)\n", " \n", " \n", "* Efferent Coupling (Ce)\n", " * Anzahl der ausgehenden Abhängigkeiten (Fan-Out) einer Komponente\n", "* Afferent Coupling (Ca)\n", " * Anzahl der eingehenden Abhängigkeiten (Fan-In) einer Komponente\n", "* Instabilität (I) = Ce / (Ce + Ca)\n", " * Stabilität gegenüber Änderungen an anderen Komponenten (geringer = stabiler)\n", " * Aber: Je stabiler desto schwerer zu ändern da viele abhängige Komponenten\n", "* Abstraktheit (A) = Na / Nc\n", " * Anteil abstrakter (Methoden/Klassen) an Gesamtheit der Komponente\n", "* Distanz (D) = |A + I - 1|\n", " * Abstand vom optimalen Verhältnis zwischen Abstraktheit und Instabilität (höher = schlechter)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Zone of Pain\n", " * Stabil (I klein) und Konkret (A klein)\n", " * Änderungen an diesen Komponenten führen zu vielen Änderungen in abhängigen Komponenten\n", "* Zone of Uselessnes\n", " * Instabil (I groß) und Abstract (A groß)\n", " * Bereitgestellte abstrakte Komponenten finden keine Verwendung" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "instability_query = %cypher MATCH (t:Type:Shopizer) \\\n", " OPTIONAL MATCH (t)-[:DEPENDS_ON]->(d:Type:Shopizer) \\\n", " WITH t, count(d) AS EfferentCoupling \\\n", " OPTIONAL MATCH (e)-[:DEPENDS_ON]->(t) \\\n", " WITH t, EfferentCoupling, count(e) AS AfferentCoupling \\\n", " WHERE EfferentCoupling + AfferentCoupling > 0 \\\n", " RETURN t.fqn AS Type, t.name AS Name, toFloat(EfferentCoupling) / (EfferentCoupling + AfferentCoupling) AS Instability \\\n", " ORDER BY Instability DESC\n", "instability_df = instability_query.get_dataframe()\n", "\n", "abstractness_query = %cypher MATCH (t:Type:Shopizer)-[:DECLARES]->(m:Method) \\\n", " WITH t, count(m) AS Total \\\n", " OPTIONAL MATCH (t)-[:DECLARES]->(m:Method{abstract: true}) \\\n", " WITH t, toFloat(count(m)) / Total AS Abstractness \\\n", " RETURN t.fqn AS Type, t.name AS Name, Abstractness \\\n", " ORDER BY Abstractness DESC\n", "abstractness_df = abstractness_query.get_dataframe()\n", "\n", "distance_df = pd.merge(instability_df, abstractness_df, how='outer', on = ['Type','Name'])\n", "distance_df = distance_df.fillna(0)\n", "\n", "distance_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "distance_df['Distance'] = abs(distance_df.Instability + distance_df.Abstractness - 1)\n", "distance_df = distance_df.sort_values('Distance', ascending=False)\n", "\n", "# Entfernen reiner Utility-Klassen (Abstractness == 0, Instability == 0) da Senken\n", "stackedBar_distance_df = distance_df[(distance_df.Abstractness > 0) & (distance_df.Instability > 0)]\n", "stackedBar_distance_df = stackedBar_distance_df[0:30]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "stacked_bar_chart = pg.StackedBar(show_legend=True, human_readable=True, fill=False, x_label_rotation=45, truncate_label=-1)\n", "stacked_bar_chart.title = 'Robert C. Martin Metriken'\n", "stacked_bar_chart.x_labels = stackedBar_distance_df['Name'].tolist()\n", "stacked_bar_chart.add('Abstractness', stackedBar_distance_df['Abstractness'].tolist())\n", "stacked_bar_chart.add('Instability', stackedBar_distance_df['Instability'].tolist())\n", "stacked_bar_chart.add('Distance', stackedBar_distance_df['Distance'].tolist())\n", "display(HTML(base_html.format(rendered_chart=stacked_bar_chart.render(is_unicode=True))))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "type_distance_doc = []\n", "for _id in distance_df.T:\n", " data = distance_df.T[_id]\n", " values = {'value': (data.Abstractness, data.Instability), 'label': data.Type}\n", " type_distance_doc.append(values)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "xy_chart = pg.XY(stroke=False, x_title='Abstractness', y_title='Instability')\n", "xy_chart.title = 'Robert C. Martin Distance'\n", "xy_chart.add('Abstractness to Instability', type_distance_doc)\n", "xy_chart.add('Optimum', [(0, 1), (1, 0)], stroke=True)\n", "xy_chart.add('Zone of Pain', [(0, 0.3), (0.3, 0)], stroke=True)\n", "xy_chart.add('Zone of Uselesness', [(1, 0.7), (0.7, 1)], stroke=True)\n", "display(HTML(base_html.format(rendered_chart=xy_chart.render(is_unicode=True))))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Aufgabe: Welche Klassen werden besonders häufig:\n", " 1. geändert\n", " 2. gefixed (anhand der Commit-Message)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "### Welche fachlichen Module sind potentielle Problemquellen?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "module_instability_query = %cypher MATCH (bC1:BoundedContext) \\\n", " WITH bC1 \\\n", " MATCH (bC1)-[:CONTAINS]->(t:Type:Shopizer)-[:DEPENDS_ON]->(d:Type:Shopizer)<-[:CONTAINS]-(bC2:BoundedContext) \\\n", " WHERE bC1 <> bC2 \\\n", " WITH bC1, count(d) AS EfferentCoupling \\\n", " MATCH (bC1)-[:CONTAINS]->(t:Type:Shopizer)<-[:DEPENDS_ON]-(d:Type:Shopizer)<-[:CONTAINS]-(bC2:BoundedContext) \\\n", " WHERE bC1 <> bC2 \\\n", " WITH bC1, EfferentCoupling, count(d) AS AfferentCoupling \\\n", " WITH bC1, toFloat(EfferentCoupling) / (EfferentCoupling + AfferentCoupling) AS Instability \\\n", " RETURN bC1.name AS Name, Instability \\\n", " ORDER BY Instability DESC\n", "module_instability_df = module_instability_query.get_dataframe()\n", "\n", "module_abstractness_query = %cypher MATCH (bC:BoundedContext)-[:CONTAINS]->(t:Type:Shopizer) \\\n", " WITH bC, count(t) AS Total \\\n", " OPTIONAL MATCH (bC)-[:CONTAINS]->(t:Type:Shopizer) \\\n", " WHERE t:Interface OR exists(t.abstract) \\\n", " WITH bC, toFloat(count(t)) / Total AS Abstractness \\\n", " RETURN bC.name AS Name, Abstractness \\\n", " ORDER BY Abstractness DESC \n", "module_abstractness_df = module_abstractness_query.get_dataframe()\n", "\n", "module_distance_df = pd.merge(module_instability_df, module_abstractness_df, how='outer', on = ['Name'])\n", "module_distance_df = module_distance_df.fillna(0)\n", "\n", "module_distance_doc = []\n", "for _id in module_distance_df.T:\n", " data = module_distance_df.T[_id]\n", " values = {'value': (data.Abstractness, data.Instability), 'label': data.Name}\n", " module_distance_doc.append(values)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "xy_module_chart = pg.XY(stroke=False, x_title='Abstractness', y_title='Instability')\n", "xy_module_chart.title = 'Robert C. Martin Distance'\n", "xy_module_chart.add('Abstractness to Instability', module_distance_doc)\n", "xy_module_chart.add('Optimum', [(0, 1), (1, 0)], stroke=True)\n", "xy_module_chart.add('Zone of Pain', [(0, 0.3), (0.3, 0)], stroke=True)\n", "xy_module_chart.add('Zone of Uselesness', [(1, 0.7), (0.7, 1)], stroke=True)\n", "display(HTML(base_html.format(rendered_chart=xy_module_chart.render(is_unicode=True))))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "* Aufgabe: Welche Bounded Contexts sind unter vielen Entwicklern bekannt, und welche eher weniger?" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Eure Fragen an die Anwendung" ] } ], "metadata": { "celltoolbar": "Slideshow", "extensions": { "jupyter_dashboards": { "activeView": "grid_default", "version": 1, "views": { "grid_default": { "cellMargin": 10, "defaultCellHeight": 20, "maxColumns": 12, "name": "grid", "type": "grid" }, "report_default": { "name": "report", "type": "report" } } } }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 4 }