{ "cells": [ { "cell_type": "markdown", "id": "85e3854f", "metadata": {}, "source": [ "# Figures" ] }, { "cell_type": "code", "execution_count": 1, "id": "89b39afa", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import math" ] }, { "cell_type": "code", "execution_count": 2, "id": "51a5b727", "metadata": {}, "outputs": [], "source": [ "def organize_data(data, technique, mappingsEG_TS):\n", " ordered_data = pd.DataFrame(index=mappingsEG_TS)\n", " ordered_data = data[1:]\n", " ordered_data.columns = technique\n", " ordered_data.replace(np.nan, 0)\n", " \n", " for i in range(0, len(ordered_data.index)):\n", " for j in range(0, len(ordered_data.columns)):\n", " new_data = float(ordered_data.iloc[i][j])\n", " ordered_data.iloc[i][j]=np.log10(new_data)\n", "\n", " return ordered_data\n" ] }, { "cell_type": "markdown", "id": "50b5a695", "metadata": {}, "source": [ "## Plots for KGC systems and triplestores" ] }, { "cell_type": "code", "execution_count": 30, "id": "c2aeac70", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1K \n", " std. reif. rdf-star named graphs n-ary rel.\n", "Oxigraph -0.278189 -0.297569 NaN -0.110698\n", "Fuseki -0.860121 -0.935542 -0.899629 -0.617983\n", "GraphDB -0.609065 -0.872895 NaN -0.585027\n", "Morph-KGC 0.15351 0.071882 0.054613 0.090963\n", "SPARQL-Anything 0.158061 0.136721 0.143327 0.137671 \n", "\n", "10K \n", " std. reif. rdf-star named graphs n-ary rel.\n", "Oxigraph 0.612466 0.577607 NaN 1.318919\n", "Fuseki -0.013228 -0.154282 -0.077794 0.714833\n", "GraphDB 0.306854 -0.017729 NaN 0.658584\n", "Morph-KGC 0.366236 0.23955 0.190332 0.243782\n", "SPARQL-Anything 0.560026 0.492062 0.556061 0.544936 \n", "\n", "100K \n", " std. reif. rdf-star named graphs n-ary rel.\n", "Oxigraph 1.599206 1.582484 NaN 1.458759\n", "Fuseki 1.693551 0.898396 0.930643 1.759638\n", "GraphDB 1.3098 0.963646 NaN 2.276942\n", "Morph-KGC 1.089587 0.90961 0.798374 0.906874\n", "SPARQL-Anything 1.345609 1.346451 1.340702 1.368956 \n", "\n", "1M \n", " std. reif. rdf-star named graphs n-ary rel.\n", "Oxigraph 2.67286 2.689963 NaN 2.456125\n", "Fuseki 2.216583 2.11886 2.200517 3.873521\n", "GraphDB 2.31729 1.959771 NaN 3.784211\n", "Morph-KGC 2.085808 1.894737 1.762386 1.88548\n", "SPARQL-Anything 2.370339 2.332184 2.331909 2.377969 \n", "\n" ] } ], "source": [ "scales = ['1K','10K', '100K', '1M']\n", "technique = ['Std. Reif.', 'RDF-star', 'Named Graphs', 'N-Ary Rel.']\n", "mappingsEG_TS = ['Oxigraph', 'Fuseki', 'GraphDB', 'Morph-KGC', 'SPARQL-Anything']\n", "\n", "data_list = {}\n", "\n", "for scale in scales:\n", " file = pd.read_csv(\"data/mq-\" + scale + \".csv\")\n", " file = np.transpose(file)\n", " data_list[scale] = organize_data(file, [x.lower() for x in technique], mappingsEG_TS)\n", " print(scale, '\\n', data_list[scale], '\\n')" ] }, { "cell_type": "code", "execution_count": 35, "id": "b2a187cf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Finished: 'figures/time_1K'\n", "Finished: 'figures/time_10K'\n", "Finished: 'figures/time_100K'\n", "Finished: 'figures/time_1M'\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.style.use('seaborn-whitegrid')\n", "\n", "for scale in data_list:\n", " barWidth = 0.11\n", " r1 = np.arange(len(data_list[scale].columns))\n", " r2 = [x + barWidth for x in r1]\n", " r3 = [x + barWidth * 2 for x in r1]\n", " r4 = [x + barWidth * 3 for x in r1]\n", " r5 = [x + barWidth * 4 for x in r1]\n", " centerticks = [-1.1] * 4\n", "\n", " plt.bar(r1, data_list[scale].values.tolist()[0], width=barWidth, color='#FC9F5B', # F2BABA\n", " label='Oxigraph', edgecolor = \"black\", linewidth=0.5)\n", " plt.bar(r2, data_list[scale].values.tolist()[1], width=barWidth, color='#FBD1A2', # A46593\n", " label='Fuseki', edgecolor = \"black\", linewidth=0.5)\n", " plt.bar(r3, data_list[scale].values.tolist()[2], width=barWidth, color='#ECE4B7', # B1ACAA\n", " label='GraphDB', edgecolor = \"black\", linewidth=0.5)\n", " plt.bar(r4, data_list[scale].values.tolist()[3], width=barWidth, color='#7DCFB6', # B1ACAA\n", " label='Morph-KGC', edgecolor = \"black\", linewidth=0.5)\n", " plt.bar(r5, data_list[scale].values.tolist()[4], width=barWidth, color='#33CA7F', # 2862CC\n", " label='SPARQL-Anything', edgecolor = \"black\", linewidth=0.5)\n", " plt.scatter(r3, centerticks, marker=\"|\", facecolor = \"gray\", linewidths=0.75)\n", "\n", "\n", " plt.xticks([r + barWidth * 1.5 for r in range(len(r1))], map(lambda each: each.strip(\"\"), technique),\n", " fontsize=12)\n", " plt.yticks(np.arange(0, 6), ('0', '1', '2', '3', '4', 'TO'), fontsize=14)\n", " plt.ylim(top=5,bottom=-1.1)\n", " plt.ylabel(\"Time (log$_{10}$(s))\", fontsize=15)\n", " plt.title(scale, fontsize=20)\n", " plt.legend(mappingsEG_TS, loc='lower center', ncol=6, bbox_to_anchor=[0.5, -0.26], fontsize=13)\n", " plt.grid(axis='x')\n", "\n", " plt.savefig(\"./figures/time_\" + scale.lower() + \".png\", bbox_inches='tight', dpi=700)\n", "\n", " plt.clf()\n", " \n", " print(\"Finished: 'figures/time_\" + scale + \"'\")" ] }, { "cell_type": "markdown", "id": "aed8ebc0", "metadata": {}, "source": [ "## Plots for details on queries in triplestores" ] }, { "cell_type": "code", "execution_count": 5, "id": "363497d2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1K \n", " q1 q2 q3 q4 q5 q6 \\\n", "Oxigraph -0.274905 -0.308919 NaN -0.199283 0.047664 NaN \n", "Fuseki -0.853872 -1.004365 -0.876148 -0.876148 -0.477556 -0.886057 \n", "GraphDB -0.632644 -0.882729 NaN -0.882729 -0.560667 NaN \n", "\n", " q7 q8 q9 q10 q11 q12 \n", "Oxigraph -0.238824 -0.317855 NaN -0.321482 -0.38405 -0.06148 \n", "Fuseki -0.823909 -0.737549 -0.931814 -0.906578 -0.917215 -0.636388 \n", "GraphDB -0.595166 -0.596879 NaN -0.598599 -0.847712 -0.595166 \n", "\n", "10K \n", " q1 q2 q3 q4 q5 q6 \\\n", "Oxigraph 0.620344 0.566673 NaN 0.711301 1.742136 NaN \n", "Fuseki -0.03105 -0.181115 -0.002177 -0.120331 1.174525 -0.137272 \n", "GraphDB 0.302547 -0.03292 NaN -0.025028 0.963126 NaN \n", "\n", " q7 q8 q9 q10 q11 q12 \n", "Oxigraph 0.628389 0.498448 NaN 0.58872 0.454997 1.716162 \n", "Fuseki -0.003488 0.000868 -0.093665 -0.004365 -0.161151 0.968996 \n", "GraphDB 0.297979 0.123198 NaN 0.31973 0.004751 0.889246 \n", "\n", "100K \n", " q1 q2 q3 q4 q5 q6 \\\n", "Oxigraph 1.642781 1.594879 NaN 1.683947 NaN NaN \n", "Fuseki 1.091561 0.868762 0.974235 0.918345 3.148766 0.888236 \n", "GraphDB 1.308906 0.955255 NaN 0.951775 2.92639 NaN \n", "\n", " q7 q8 q9 q10 q11 q12 \n", "Oxigraph 1.596597 1.458759 NaN 1.558228 1.468643 NaN \n", "Fuseki 1.039533 1.04708 0.929419 1.083072 0.908163 2.949539 \n", "GraphDB 1.27395 1.074158 NaN 1.346549 0.983852 2.830287 \n", "\n", "1M \n", " q1 q2 q3 q4 q5 q6 q7 \\\n", "Oxigraph 2.782481 2.727531 NaN 2.793882 NaN NaN 2.589086 \n", "Fuseki 2.26766 1.999296 2.112675 2.061189 5.0 2.15412 2.376119 \n", "GraphDB 2.33546 1.963202 NaN 1.932971 5.0 NaN 2.259319 \n", "\n", " q8 q9 q10 q11 q12 \n", "Oxigraph 2.456125 NaN 2.647011 2.548475 NaN \n", "Fuseki 2.349594 2.334753 2.005969 2.296099 5.0 \n", "GraphDB 2.081664 NaN 2.357088 1.983134 5.0 \n", "\n" ] } ], "source": [ "scales = ['1K','10K', '100K', '1M']\n", "technique = ['Std. Reif.', 'RDF-star', 'Named Graphs', 'N-Ary Rel.']\n", "query = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11', 'Q12']\n", "mappingsEG_TS = ['Oxigraph', 'Fuseki', 'GraphDB']\n", "\n", "data_list = {}\n", "\n", "for scale in scales:\n", " file = pd.read_csv(\"data/q-\" + scale + \".csv\", sep='\\t')\n", " file = np.transpose(file)\n", " data_list[scale] = organize_data(file, [x.lower() for x in query], mappingsEG_TS)\n", " print(scale, '\\n', data_list[scale], '\\n')" ] }, { "cell_type": "code", "execution_count": 28, "id": "e7ba0fe3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Finished: 'figures/time_queries_1K'\n", "Finished: 'figures/time_queries_10K'\n", "Finished: 'figures/time_queries_100K'\n", "Finished: 'figures/time_queries_1M'\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.style.use('seaborn-whitegrid')\n", "\n", "for scale in data_list:\n", " barWidth = 0.23\n", " r1 = np.arange(len(data_list[scale].columns))\n", " r2 = [x + barWidth for x in r1]\n", " r3 = [x + barWidth * 2 for x in r1]\n", " centerticks = [-1.1] * 12\n", "\n", " plt.bar(r1, data_list[scale].values.tolist()[0], width=barWidth, color='#FC9F5B', # F2BABA\n", " label='Oxigraph', edgecolor = \"black\", linewidth=0.5)\n", " plt.bar(r2, data_list[scale].values.tolist()[1], width=barWidth, color='#FBD1A2', # A46593\n", " label='Fuseki', edgecolor = \"black\", linewidth=0.5)\n", " plt.bar(r3, data_list[scale].values.tolist()[2], width=barWidth, color='#ECE4B7', # B1ACAA\n", " label='GraphDB', edgecolor = \"black\", linewidth=0.5)\n", " plt.scatter(r2, centerticks, marker=\"|\", facecolor = \"gray\", linewidths=0.75)\n", "\n", "\n", " plt.xticks([r + barWidth * 1.5 for r in range(len(r1))], map(lambda each: each.strip(\"\"), query),\n", " fontsize=10)\n", " plt.yticks(np.arange(0, 6), ('0', '1', '2', '3', '4', 'TO'), fontsize=14)\n", " plt.ylim(top=5,bottom=-1.1)\n", " plt.ylabel(\"Time (log$_{10}$(s))\", fontsize=15)\n", " plt.title(scale, fontsize=20)\n", " plt.legend(mappingsEG_TS, loc='lower center', ncol=6, bbox_to_anchor=[0.5, -0.26], fontsize=13)\n", " plt.grid(axis='x')\n", "\n", " #plt.show()\n", " plt.savefig(\"./figures/time_queries_\" + scale.lower() + \".png\", bbox_inches='tight', dpi=700)\n", "\n", " plt.clf()\n", " \n", " print(\"Finished: 'figures/time_queries_\" + scale + \"'\")" ] }, { "cell_type": "code", "execution_count": null, "id": "4717afc0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }