{ "cells": [ { "cell_type": "markdown", "id": "liberal-publisher", "metadata": {}, "source": [ "# Comprehensive Constraints Analysis - With Removed Statements - Final\n", "\n", "In this notebook, the original dataset is combined with the removed statements dataset and then the violations are determined in total." ] }, { "cell_type": "code", "execution_count": 2, "id": "juvenile-ability", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f09d8d199d445fbb9e4ed86e3bb148e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1149471184 [00:00(node2), \" + parentFile + \": (node1)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " --graph-cache ~/sqlite3_caches/type_new2_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv ;\\\n", " kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", " --match 'm: (node1)-[nodeProp]->(node2), \" + parentFile + \": (node1)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " --graph-cache ~/sqlite3_caches/type_new2_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ; \\\n", " kgtk --debug cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv ; \\\n", " ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\")\n", "\n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": null, "id": "electrical-agreement", "metadata": {}, "outputs": [], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 72, "id": "outside-stupid", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,14):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/typeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "competitive-canvas", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 21, "id": "casual-perth", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "87626194dcff46d9a805455ca60d475c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4d4a01eea5e946988b0acf5442b9ae20", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/835 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P13032031482610[../../allConstraintsAnalysis_WRemoved_Final/t...0.012685
P39193393158[../../allConstraintsAnalysis_WRemoved_Final/t...0.044495
P618526912[../../allConstraintsAnalysis_WRemoved_Final/t...0.042705
P3922182451[../../allConstraintsAnalysis_WRemoved_Final/t...0.027200
P30170594928017[../../allConstraintsAnalysis_WRemoved_Final/t...0.038172
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P1303 203148 2610 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P3919 3393 158 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P6185 269 12 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P3922 1824 51 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P301 705949 28017 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "\n", " violation_ratio \n", "P1303 0.012685 \n", "P3919 0.044495 \n", "P6185 0.042705 \n", "P3922 0.027200 \n", "P301 0.038172 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 27, "id": "competitive-peeing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P81380462[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P5051081[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P2309055[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P2308051[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P2303044[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P1227020[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P2912016[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P6001016[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P8738014[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P538010[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P800406[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P558905[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P231205[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P651004[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
P231004[../../allConstraintsAnalysis_WRemoved_Final/t...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P8138 0 462 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P5051 0 81 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P2309 0 55 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P2308 0 51 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P2303 0 44 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P1227 0 20 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P2912 0 16 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P6001 0 16 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P8738 0 14 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P538 0 10 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P8004 0 6 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P5589 0 5 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P2312 0 5 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P6510 0 4 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P2310 0 4 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "\n", " violation_ratio \n", "P8138 1.0 \n", "P5051 1.0 \n", "P2309 1.0 \n", "P2308 1.0 \n", "P2303 1.0 \n", "P1227 1.0 \n", "P2912 1.0 \n", "P6001 1.0 \n", "P8738 1.0 \n", "P538 1.0 \n", "P8004 1.0 \n", "P5589 1.0 \n", "P2312 1.0 \n", "P6510 1.0 \n", "P2310 1.0 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 30, "id": "clinical-lawsuit", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1465.000000\n", "mean 0.117857\n", "std 0.226919\n", "min 0.000000\n", "25% 0.007233\n", "50% 0.024422\n", "75% 0.094675\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 31, "id": "wanted-domestic", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios')" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 32, "id": "sufficient-hollywood", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios (<=0.05)')" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF[typeConstDF['violation_ratio'] <= 0.05].violation_ratio.plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios (<=0.05)\")" ] }, { "cell_type": "code", "execution_count": 33, "id": "minor-marshall", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/1465\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(typeConstDF['violation_ratio'] >= 5.286054)}/{len(typeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 36, "id": "revolutionary-violence", "metadata": {}, "outputs": [], "source": [ "for key1 in typeConstViolations.keys():\n", " typeConstViolations[key1]['correct'] = typeConstViolations[key1]['instanceOf']['correct'] + typeConstViolations[key1]['subclass']['correct'] + typeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " typeConstViolations[key1]['incorrect'] = typeConstViolations[key1]['instanceOf']['incorrect'] + typeConstViolations[key1]['subclass']['incorrect'] + typeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " typeConstViolations[key1]['VR'] = typeConstViolations[key1]['incorrect'] / (typeConstViolations[key1]['correct'] + typeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 37, "id": "emotional-favorite", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 46306630, 'incorrect': 798106},\n", " 'subclass': {'correct': 2064, 'incorrect': 53},\n", " 'instanceOfOrSubclass': {'correct': 233195, 'incorrect': 3169},\n", " 'propCount': 167,\n", " 'correct': 46541889,\n", " 'incorrect': 801328,\n", " 'VR': 0.016925930487571218},\n", " 'suggestion': {'instanceOf': {'correct': 62170, 'incorrect': 19110},\n", " 'subclass': {'correct': 0, 'incorrect': 0},\n", " 'instanceOfOrSubclass': {'correct': 24237, 'incorrect': 3458},\n", " 'propCount': 11,\n", " 'correct': 86407,\n", " 'incorrect': 22568,\n", " 'VR': 0.20709337003899977},\n", " 'normal': {'instanceOf': {'correct': 425791059, 'incorrect': 7991316},\n", " 'subclass': {'correct': 98826, 'incorrect': 13672},\n", " 'instanceOfOrSubclass': {'correct': 68383205, 'incorrect': 875475},\n", " 'propCount': 1287,\n", " 'correct': 494273090,\n", " 'incorrect': 8880463,\n", " 'VR': 0.017649608051162863}}" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstViolations" ] }, { "cell_type": "code", "execution_count": 38, "id": "aggregate-impact", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratiototal
P20931488432131512369[../../allConstraintsAnalysis_WRemoved_Final/t...0.010059150355582
P1476441008712199197[../../allConstraintsAnalysis_WRemoved_Final/t...0.04749946300068
P57739990807165864[../../allConstraintsAnalysis_WRemoved_Final/t...0.00413040156671
P143337028672112955[../../allConstraintsAnalysis_WRemoved_Final/t...0.00304137141627
P121533425605316565[../../allConstraintsAnalysis_WRemoved_Final/t...0.00938233742170
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2093 148843213 1512369 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P1476 44100871 2199197 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P577 39990807 165864 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P1433 37028672 112955 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "P1215 33425605 316565 [../../allConstraintsAnalysis_WRemoved_Final/t... \n", "\n", " violation_ratio total \n", "P2093 0.010059 150355582 \n", "P1476 0.047499 46300068 \n", "P577 0.004130 40156671 \n", "P1433 0.003041 37141627 \n", "P1215 0.009382 33742170 " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['total'] = typeConstDF['correct'] + typeConstDF['incorrect']\n", "typeConstDF.sort_values(by=['total'],ascending=False).head()" ] }, { "cell_type": "code", "execution_count": 39, "id": "grateful-telling", "metadata": {}, "outputs": [], "source": [ "typeConstDF.to_csv('../../allConstraintsAnalysis_WRemoved_Final/typeConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "bearing-kruger", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "veterinary-fault", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "filePath = '/data/wd-correctness/propertiesSplit_WRemoved_Final/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"typeConstraintValidator\"):\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes" ] }, { "cell_type": "code", "execution_count": null, "id": "infinite-assembly", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "opened-essex", "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for type constraint checks\")" ] }, { "cell_type": "markdown", "id": "intense-computer", "metadata": {}, "source": [ "## Value Type Constraint\n", "\n", "Here, the constraint indicates that node2 must be an instance of or subclass of the specified class" ] }, { "cell_type": "markdown", "id": "animated-companion", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": null, "id": "static-profit", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "dfValueType = pd.read_csv('../../constraintsOP/valuetypeConstraint/claims.type-constraints_all1.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": null, "id": "worthy-malawi", "metadata": {}, "outputs": [], "source": [ "dfValueType = dfValueType.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": null, "id": "eleven-tiffany", "metadata": {}, "outputs": [], "source": [ "dfValueType.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "expired-stuff", "metadata": {}, "outputs": [], "source": [ "dfValueType['label'].unique()" ] }, { "cell_type": "markdown", "id": "digital-harvard", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 43, "id": "white-badge", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bb623e1d72164970a40f3bc2d9ab6346", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/932 [00:00(node2), \" + parentFile + \": (node2)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " --graph-cache ~/sqlite3_caches/valueType_new_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv ;\\\n", " kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", " --match 'm: (node1)-[nodeProp]->(node2), \" + parentFile + \": (node2)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " --graph-cache ~/sqlite3_caches/valueType_new_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ; \\\n", " kgtk --debug cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv ; \\\n", " ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\")\n", "\n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 3, "id": "capable-ballot", "metadata": {}, "outputs": [], "source": [ "!zgrep -P \"Q98970042\\t\" ../../wikidata-20210215/derived.P279star.tsv.gz" ] }, { "cell_type": "code", "execution_count": 44, "id": "qualified-cursor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "897" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 45, "id": "simplified-cameroon", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,9):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/valueTypeConstraintValidator_xverify3\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "spectacular-warner", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 41, "id": "valid-defense", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "98c5e9ac78ef42519b4391f36064d9ec", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0de0552b0a8340ad9113f80de62f4ee1", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/540 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P852452624[../../allConstraintsAnalysis_WRemoved_Final/v...0.005275
P236344040[../../allConstraintsAnalysis_WRemoved_Final/v...0.000000
P73271876[../../allConstraintsAnalysis_WRemoved_Final/v...0.031088
P85316008[../../allConstraintsAnalysis_WRemoved_Final/v...0.004975
P23024791835[../../allConstraintsAnalysis_WRemoved_Final/v...0.000730
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P852 4526 24 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P2363 4404 0 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P7327 187 6 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P853 1600 8 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P2302 47918 35 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "\n", " violation_ratio \n", "P852 0.005275 \n", "P2363 0.000000 \n", "P7327 0.031088 \n", "P853 0.004975 \n", "P2302 0.000730 " ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 47, "id": "neural-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P50080341961[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P610409808[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P7374044[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P3028015[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P2839015[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P3027013[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P538010[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P224106[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P442506[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P619105[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P653305[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P653405[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P66004[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P717403[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
P862702[../../allConstraintsAnalysis_WRemoved_Final/v...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5008 0 341961 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P6104 0 9808 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P7374 0 44 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P3028 0 15 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P2839 0 15 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P3027 0 13 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P538 0 10 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P2241 0 6 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P4425 0 6 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P6191 0 5 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P6533 0 5 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P6534 0 5 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P660 0 4 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P7174 0 3 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "P8627 0 2 [../../allConstraintsAnalysis_WRemoved_Final/v... \n", "\n", " violation_ratio \n", "P5008 1.0 \n", "P6104 1.0 \n", "P7374 1.0 \n", "P3028 1.0 \n", "P2839 1.0 \n", "P3027 1.0 \n", "P538 1.0 \n", "P2241 1.0 \n", "P4425 1.0 \n", "P6191 1.0 \n", "P6533 1.0 \n", "P6534 1.0 \n", "P660 1.0 \n", "P7174 1.0 \n", "P8627 1.0 " ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 49, "id": "cutting-polyester", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 904.000000\n", "mean 0.112995\n", "std 0.211926\n", "min 0.000000\n", "25% 0.006362\n", "50% 0.024834\n", "75% 0.099675\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 50, "id": "alert-receiver", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios')" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 51, "id": "italian-motel", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios (<=0.04)')" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF[valTypeConstDF['violation_ratio'] <= 0.04].violation_ratio.plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios (<=0.04)\")" ] }, { "cell_type": "code", "execution_count": 52, "id": "prescription-ceramic", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/904\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(valTypeConstDF['violation_ratio'] >= 3.950680)}/{len(valTypeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 55, "id": "tutorial-mineral", "metadata": {}, "outputs": [], "source": [ "for key1 in valueTypeConstViolations.keys():\n", " valueTypeConstViolations[key1]['correct'] = valueTypeConstViolations[key1]['instanceOf']['correct'] + valueTypeConstViolations[key1]['subclass']['correct'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " valueTypeConstViolations[key1]['incorrect'] = valueTypeConstViolations[key1]['instanceOf']['incorrect'] + valueTypeConstViolations[key1]['subclass']['incorrect'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " valueTypeConstViolations[key1]['VR'] = valueTypeConstViolations[key1]['incorrect'] / (valueTypeConstViolations[key1]['correct'] + valueTypeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 56, "id": "satellite-concern", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 11564885, 'incorrect': 30391},\n", " 'subclass': {'correct': 55983, 'incorrect': 83},\n", " 'instanceOfOrSubclass': {'correct': 13090, 'incorrect': 320},\n", " 'propCount': 108,\n", " 'correct': 11633958,\n", " 'incorrect': 30794,\n", " 'VR': 0.0026399189627006217},\n", " 'suggestion': {'instanceOf': {'correct': 46189, 'incorrect': 715},\n", " 'subclass': {'correct': 127, 'incorrect': 32},\n", " 'instanceOfOrSubclass': {'correct': 0, 'incorrect': 0},\n", " 'propCount': 5,\n", " 'correct': 46316,\n", " 'incorrect': 747,\n", " 'VR': 0.01587234132970699},\n", " 'normal': {'instanceOf': {'correct': 94116782, 'incorrect': 2067912},\n", " 'subclass': {'correct': 4674914, 'incorrect': 73885},\n", " 'instanceOfOrSubclass': {'correct': 77686563, 'incorrect': 959169},\n", " 'propCount': 791,\n", " 'correct': 176478259,\n", " 'incorrect': 3100966,\n", " 'VR': 0.01726795513233783}}" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valueTypeConstViolations" ] }, { "cell_type": "code", "execution_count": 57, "id": "fabulous-sudan", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF.to_csv('../../allConstraintsAnalysis_WRemoved_Final/valueTypeConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "traditional-shakespeare", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "spoken-symphony", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "filePath = '/data/wd-correctness/propertiesSplit_WRemoved_Final/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"valueTypeConstraintValidator\"):\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes" ] }, { "cell_type": "code", "execution_count": null, "id": "heavy-argentina", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "organized-mixer", "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for value type constraint checks\")" ] }, { "cell_type": "markdown", "id": "motivated-sympathy", "metadata": {}, "source": [ "## Item Requires Statement Constraint\n", "\n", "Here, the constraint mentions the other properties that node1 must have and the values that this property must have in few cases" ] }, { "cell_type": "markdown", "id": "chubby-glass", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 58, "id": "funny-batch", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/itemRequiresConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 59, "id": "original-expression", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 60, "id": "adequate-symphony", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2305', 'P2316', 'P2304', 'P2303', 'P6607', 'P4155',\n", " 'P31', 'P2916', 'P4680', 'P2308'], dtype=object)" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 61, "id": "infrared-canal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 7182\n", "P2305 2540\n", "P2316 2523\n", "P2303 422\n", "P2304 14\n", "P6607 14\n", "P2916 5\n", "P4680 2\n", "P4155 1\n", "P31 1\n", "P2308 1\n", "Name: label, dtype: int64" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 62, "id": "focused-karen", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 63, "id": "private-boundary", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1id
P1006P1006-P2302-Q21503247-0451ef47-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010P1010-P2302-Q21503247-56183614-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010-P2302-Q21503247-fd256eaf-0NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015P1015-P2302-Q21503247-20e3bfc5-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017P1017-P2302-Q21503247-bbac2ce3-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN [P214] NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN [P31] NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN NaN [Q794] [P17] NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN [P31] NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN [P214] NaN \n", "\n", "label P2316 P2916 P31 P4155 P4680 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN NaN NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN NaN NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 [Q21502408] NaN NaN NaN NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN NaN NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 64, "id": "conceptual-schedule", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 65, "id": "third-hayes", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1006NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 P4680 \\\n", "node1 \n", "P1006 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN [Q794] [P17] NaN [Q21502408] NaN NaN NaN NaN \n", "P1015 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1017 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 \n", "P1006 NaN \n", "P1010 NaN \n", "P1010 NaN \n", "P1015 NaN \n", "P1017 NaN " ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "shaped-companion", "metadata": {}, "source": [ "However, there is one anomaly where the property does not have a co-dependency constraint associated with it, but still has a link to this constraint." ] }, { "cell_type": "code", "execution_count": 66, "id": "indian-journal", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P5447NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
P5448NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 \\\n", "node1 \n", "P5447 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "P5448 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "\n", "label P4680 P6607 \n", "node1 \n", "P5447 [Q46466783] NaN \n", "P5448 [Q46466783] NaN " ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P4680'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "code", "execution_count": 67, "id": "discrete-template", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q20808382, Q28218485, Q3044918][P39]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q82955][P106]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q5][P31]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q142, Q71084][P27]NaNNaNNaNNaNNaNNaNNaN
....................................
P980NaNNaN[Q34][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q55][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q1852859][P31]NaNNaNNaNNaNNaNNaNNaN
P988NaNNaN[Q928][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P990[Q49678, Q853715]NaN[Q5][P31]NaNNaNNaNNaNNaNNaNNaN
\n", "

2540 rows × 11 columns

\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 \\\n", "node1 \n", "P1010 NaN NaN [Q794] [P17] \n", "P1045 NaN NaN [Q20808382, Q28218485, Q3044918] [P39] \n", "P1045 NaN NaN [Q82955] [P106] \n", "P1045 NaN NaN [Q5] [P31] \n", "P1045 NaN NaN [Q142, Q71084] [P27] \n", "... ... ... ... ... \n", "P980 NaN NaN [Q34] [P17] \n", "P981 NaN NaN [Q55] [P17] \n", "P981 NaN NaN [Q1852859] [P31] \n", "P988 NaN NaN [Q928] [P17] \n", "P990 [Q49678, Q853715] NaN [Q5] [P31] \n", "\n", "label P2308 P2316 P2916 P31 P4155 P4680 P6607 \n", "node1 \n", "P1010 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... ... ... \n", "P980 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN NaN NaN NaN NaN NaN NaN \n", "P988 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P990 NaN NaN NaN NaN NaN NaN NaN \n", "\n", "[2540 rows x 11 columns]" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P2305'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "markdown", "id": "forced-christmas", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "markdown", "id": "acquired-floor", "metadata": {}, "source": [ "#### Version 1 - Mandatory + Suggestion + Normal" ] }, { "cell_type": "code", "execution_count": 11, "id": "turkish-establishment", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + suggestion + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_WRemoved_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_WRemoved_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 12, "id": "peripheral-herald", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "534" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 13, "id": "incorporated-logistics", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fCnt" ] }, { "cell_type": "code", "execution_count": 54, "id": "optimum-blowing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,28):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/codepConst_MSN_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "indoor-verse", "metadata": {}, "source": [ "#### Version 2 - Mandatory + Normal" ] }, { "cell_type": "code", "execution_count": 15, "id": "furnished-paradise", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_WRemoved_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_WRemoved_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 16, "id": "searching-individual", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "475" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 55, "id": "silver-clarity", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,25):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/codepConst_MN_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "prescription-access", "metadata": {}, "source": [ "#### Version 3 - Mandatory" ] }, { "cell_type": "code", "execution_count": 17, "id": "married-porter", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_WRemoved_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_WRemoved_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 18, "id": "according-blackberry", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "79" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 56, "id": "extraordinary-drawing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,5):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/codepConst_M_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "subsequent-brown", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 21, "id": "operational-migration", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_WRemoved_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_WRemoved_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv\\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 22, "id": "harmful-binary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "424" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 57, "id": "advance-married", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,23):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/codepConst_N_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "ranging-journal", "metadata": {}, "source": [ "#### Version 5 - Suggestion" ] }, { "cell_type": "code", "execution_count": 23, "id": "missing-jordan", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = suggestion\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_WRemoved_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_WRemoved_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 24, "id": "soviet-forth", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "97" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 58, "id": "racial-stationery", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,6):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/codepConst_S_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "homeless-pleasure", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 68, "id": "welcome-dependence", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e25b0c210ce141f3bf78f3aa51ff00d9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7baebc8398064eb59bcf717d236bc684", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1206 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P101818616[../../allConstraintsAnalysis_WRemoved_Final/c...
P14561864837[../../allConstraintsAnalysis_WRemoved_Final/c...
P19278222228453[../../allConstraintsAnalysis_WRemoved_Final/c...
P231205[../../allConstraintsAnalysis_WRemoved_Final/c...
P2860174842113152425[../../allConstraintsAnalysis_WRemoved_Final/c...
............
P409114871[../../allConstraintsAnalysis_WRemoved_Final/c...
P410138058813[../../allConstraintsAnalysis_WRemoved_Final/c...
P41163046866[../../allConstraintsAnalysis_WRemoved_Final/c...
P4122046513806[../../allConstraintsAnalysis_WRemoved_Final/c...
P41338914535536[../../allConstraintsAnalysis_WRemoved_Final/c...
\n", "

534 rows × 3 columns

\n", "" ], "text/plain": [ " correct incorrect paths\n", "P1018 186 16 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P1456 18648 37 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P19 2782222 28453 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P2312 0 5 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P2860 174842113 152425 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "... ... ... ...\n", "P4091 1487 1 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P410 138058 813 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P411 6304 6866 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P412 20465 13806 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P413 389145 35536 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "\n", "[534 rows x 3 columns]" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1" ] }, { "cell_type": "code", "execution_count": 76, "id": "powered-residence", "metadata": {}, "outputs": [], "source": [ "codepConstDF1['violation_ratio'] = codepConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 77, "id": "chinese-pressing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2302048022[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P306301252[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P2309055[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P2308051[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P5447024[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P5448023[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P756909[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P790307[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231205[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231304[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231004[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231104[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P468002[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P230702[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P291602[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2302 0 48022 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P3063 0 1252 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2309 0 55 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2308 0 51 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P5447 0 24 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P5448 0 23 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P7569 0 9 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P7903 0 7 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2312 0 5 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2313 0 4 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2310 0 4 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2311 0 4 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P4680 0 2 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2307 0 2 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2916 0 2 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P2302 1.0 \n", "P3063 1.0 \n", "P2309 1.0 \n", "P2308 1.0 \n", "P5447 1.0 \n", "P5448 1.0 \n", "P7569 1.0 \n", "P7903 1.0 \n", "P2312 1.0 \n", "P2313 1.0 \n", "P2310 1.0 \n", "P2311 1.0 \n", "P4680 1.0 \n", "P2307 1.0 \n", "P2916 1.0 " ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['violation_ratio', 'incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 80, "id": "demonstrated-debut", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P7342119055738534[../../allConstraintsAnalysis_WRemoved_Final/c...0.258447
P22142989348721476[../../allConstraintsAnalysis_WRemoved_Final/c...0.194425
P43331113458436995[../../allConstraintsAnalysis_WRemoved_Final/c...0.013851
P1951150231401686[../../allConstraintsAnalysis_WRemoved_Final/c...0.258832
P5696926852267556[../../allConstraintsAnalysis_WRemoved_Final/c...0.037189
P13111274319200519[../../allConstraintsAnalysis_WRemoved_Final/c...0.017475
P2757295163856[../../allConstraintsAnalysis_WRemoved_Final/c...0.957377
P2860174842113152425[../../allConstraintsAnalysis_WRemoved_Final/c...0.000871
P5703471930105652[../../allConstraintsAnalysis_WRemoved_Final/c...0.029532
P201786770101977[../../allConstraintsAnalysis_WRemoved_Final/c...0.540284
P1435212163792401[../../allConstraintsAnalysis_WRemoved_Final/c...0.041734
P19223063664256[../../allConstraintsAnalysis_WRemoved_Final/c...0.677149
P47653795063425[../../allConstraintsAnalysis_WRemoved_Final/c...0.625647
P7084571857276[../../allConstraintsAnalysis_WRemoved_Final/c...0.556110
P19712201852917[../../allConstraintsAnalysis_WRemoved_Final/c...0.302495
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P734 2119055 738534 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2214 2989348 721476 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P433 31113458 436995 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P195 1150231 401686 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P569 6926852 267556 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P131 11274319 200519 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P275 7295 163856 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2860 174842113 152425 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P570 3471930 105652 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2017 86770 101977 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1435 2121637 92401 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1922 30636 64256 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P4765 37950 63425 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P708 45718 57276 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P197 122018 52917 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P734 0.258447 \n", "P2214 0.194425 \n", "P433 0.013851 \n", "P195 0.258832 \n", "P569 0.037189 \n", "P131 0.017475 \n", "P275 0.957377 \n", "P2860 0.000871 \n", "P570 0.029532 \n", "P2017 0.540284 \n", "P1435 0.041734 \n", "P1922 0.677149 \n", "P4765 0.625647 \n", "P708 0.556110 \n", "P197 0.302495 " ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 81, "id": "developed-zimbabwe", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 534.000000\n", "mean 0.212037\n", "std 0.306183\n", "min 0.000000\n", "25% 0.001654\n", "50% 0.040424\n", "75% 0.329546\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 82, "id": "unknown-johnston", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios')" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 83, "id": "exceptional-dakota", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5')" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1[codepConstDF1['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 84, "id": "interior-joseph", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/534\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF1['violation_ratio'] >= 3.539484)}/{len(codepConstDF1)}\")" ] }, { "cell_type": "code", "execution_count": 85, "id": "english-difference", "metadata": {}, "outputs": [], "source": [ "codepConstDF1.to_csv('../../allConstraintsAnalysis_WRemoved_Final/codepConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "greater-genetics", "metadata": {}, "source": [ "#### Version 2 - Mand Normal" ] }, { "cell_type": "code", "execution_count": 86, "id": "constant-chance", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF2 = pd.DataFrame(codepConstViolations['Mand_Normal']).T" ] }, { "cell_type": "code", "execution_count": 87, "id": "included-adjustment", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P101818616[../../allConstraintsAnalysis_WRemoved_Final/c...
P153383289693[../../allConstraintsAnalysis_WRemoved_Final/c...
P2009103617[../../allConstraintsAnalysis_WRemoved_Final/c...
P240258019[../../allConstraintsAnalysis_WRemoved_Final/c...
P38163110[../../allConstraintsAnalysis_WRemoved_Final/c...
............
P364870[../../allConstraintsAnalysis_WRemoved_Final/c...
P3701721[../../allConstraintsAnalysis_WRemoved_Final/c...
P37133591[../../allConstraintsAnalysis_WRemoved_Final/c...
P37443771[../../allConstraintsAnalysis_WRemoved_Final/c...
P38156001[../../allConstraintsAnalysis_WRemoved_Final/c...
\n", "

475 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1018 186 16 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P1533 8328 9693 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P2009 1036 17 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P240 25801 9 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P3816 31 10 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "... ... ... ...\n", "P3648 7 0 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P3701 72 1 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P3713 359 1 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P3744 377 1 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P3815 600 1 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "\n", "[475 rows x 3 columns]" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2" ] }, { "cell_type": "code", "execution_count": 88, "id": "fundamental-knowing", "metadata": {}, "outputs": [], "source": [ "codepConstDF2['violation_ratio'] = codepConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 89, "id": "harmful-discipline", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2308051[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P291602[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P5448023[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P5447024[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P230401[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P230702[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P2309055[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P468002[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231004[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P284201[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231104[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231205[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P231304[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P826401[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
P756909[../../allConstraintsAnalysis_WRemoved_Final/c...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2308 0 51 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2916 0 2 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P5448 0 23 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P5447 0 24 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2304 0 1 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2307 0 2 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2309 0 55 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P4680 0 2 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2310 0 4 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2842 0 1 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2311 0 4 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2312 0 5 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2313 0 4 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P8264 0 1 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P7569 0 9 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P2308 1.0 \n", "P2916 1.0 \n", "P5448 1.0 \n", "P5447 1.0 \n", "P2304 1.0 \n", "P2307 1.0 \n", "P2309 1.0 \n", "P4680 1.0 \n", "P2310 1.0 \n", "P2842 1.0 \n", "P2311 1.0 \n", "P2312 1.0 \n", "P2313 1.0 \n", "P8264 1.0 \n", "P7569 1.0 " ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 90, "id": "unlikely-chamber", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142989348721476[../../allConstraintsAnalysis_WRemoved_Final/c...0.194425
P43331113458436995[../../allConstraintsAnalysis_WRemoved_Final/c...0.013851
P2757295163856[../../allConstraintsAnalysis_WRemoved_Final/c...0.957377
P2860174842113152425[../../allConstraintsAnalysis_WRemoved_Final/c...0.000871
P1435212163792401[../../allConstraintsAnalysis_WRemoved_Final/c...0.041734
P7084571857276[../../allConstraintsAnalysis_WRemoved_Final/c...0.556110
P19712201852917[../../allConstraintsAnalysis_WRemoved_Final/c...0.302495
P15984060049329[../../allConstraintsAnalysis_WRemoved_Final/c...0.548533
P1111146803[../../allConstraintsAnalysis_WRemoved_Final/c...0.999979
P856145194944006[../../allConstraintsAnalysis_WRemoved_Final/c...0.029417
P2248402541608[../../allConstraintsAnalysis_WRemoved_Final/c...0.911796
P2325407940619[../../allConstraintsAnalysis_WRemoved_Final/c...0.908743
P2243403536870[../../allConstraintsAnalysis_WRemoved_Final/c...0.901357
P2244403636801[../../allConstraintsAnalysis_WRemoved_Final/c...0.901168
P41338914535536[../../allConstraintsAnalysis_WRemoved_Final/c...0.083677
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2989348 721476 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P433 31113458 436995 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P275 7295 163856 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2860 174842113 152425 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1435 2121637 92401 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P708 45718 57276 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P197 122018 52917 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1598 40600 49329 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1111 1 46803 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P856 1451949 44006 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2248 4025 41608 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2325 4079 40619 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2243 4035 36870 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2244 4036 36801 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P413 389145 35536 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P2214 0.194425 \n", "P433 0.013851 \n", "P275 0.957377 \n", "P2860 0.000871 \n", "P1435 0.041734 \n", "P708 0.556110 \n", "P197 0.302495 \n", "P1598 0.548533 \n", "P1111 0.999979 \n", "P856 0.029417 \n", "P2248 0.911796 \n", "P2325 0.908743 \n", "P2243 0.901357 \n", "P2244 0.901168 \n", "P413 0.083677 " ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 91, "id": "violent-match", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 475.000000\n", "mean 0.177493\n", "std 0.289214\n", "min 0.000000\n", "25% 0.000966\n", "50% 0.023847\n", "75% 0.214528\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 92, "id": "educational-thickness", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios')" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 93, "id": "latin-mitchell", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5')" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2[codepConstDF2['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 94, "id": "asian-forwarding", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/475\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF2['violation_ratio'] >= 2.290915)}/{len(codepConstDF2)}\")" ] }, { "cell_type": "markdown", "id": "destroyed-flash", "metadata": {}, "source": [ "#### Version 3 - Mand" ] }, { "cell_type": "code", "execution_count": 95, "id": "consecutive-plenty", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF3 = pd.DataFrame(codepConstViolations['Mand']).T" ] }, { "cell_type": "code", "execution_count": 96, "id": "digital-mileage", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P564188875[../../allConstraintsAnalysis_WRemoved_Final/c...
P1081126930[../../allConstraintsAnalysis_WRemoved_Final/c...
P598213280[../../allConstraintsAnalysis_WRemoved_Final/c...
P108737746351210[../../allConstraintsAnalysis_WRemoved_Final/c...
P613706085[../../allConstraintsAnalysis_WRemoved_Final/c...
............
P510520600[../../allConstraintsAnalysis_WRemoved_Final/c...
P5172480[../../allConstraintsAnalysis_WRemoved_Final/c...
P52117570[../../allConstraintsAnalysis_WRemoved_Final/c...
P555173952[../../allConstraintsAnalysis_WRemoved_Final/c...
P562314740[../../allConstraintsAnalysis_WRemoved_Final/c...
\n", "

79 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P564 18887 5 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P1081 12693 0 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P5982 1328 0 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P1087 3774635 1210 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P613 70608 5 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "... ... ... ...\n", "P5105 2060 0 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P517 248 0 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P521 1757 0 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P555 17395 2 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P5623 1474 0 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "\n", "[79 rows x 3 columns]" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3" ] }, { "cell_type": "code", "execution_count": 97, "id": "formed-battle", "metadata": {}, "outputs": [], "source": [ "codepConstDF3['violation_ratio'] = codepConstDF3.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 98, "id": "numerous-construction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P5051279[../../allConstraintsAnalysis_WRemoved_Final/c...39.500000
P434181[../../allConstraintsAnalysis_WRemoved_Final/c...0.125000
P2095383[../../allConstraintsAnalysis_WRemoved_Final/c...0.078947
P990108944[../../allConstraintsAnalysis_WRemoved_Final/c...0.040404
P3931231668[../../allConstraintsAnalysis_WRemoved_Final/c...0.029361
P2461745[../../allConstraintsAnalysis_WRemoved_Final/c...0.028736
P1713293579[../../allConstraintsAnalysis_WRemoved_Final/c...0.026917
P1411312323270[../../allConstraintsAnalysis_WRemoved_Final/c...0.024918
P17313828[../../allConstraintsAnalysis_WRemoved_Final/c...0.020942
P2009103617[../../allConstraintsAnalysis_WRemoved_Final/c...0.016409
P826401[../../allConstraintsAnalysis_WRemoved_Final/c...0.010000
P284201[../../allConstraintsAnalysis_WRemoved_Final/c...0.010000
P1560345933[../../allConstraintsAnalysis_WRemoved_Final/c...0.009540
P16359228[../../allConstraintsAnalysis_WRemoved_Final/c...0.008677
P9448517[../../allConstraintsAnalysis_WRemoved_Final/c...0.008226
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5051 2 79 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P4341 8 1 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2095 38 3 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P990 1089 44 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P3931 2316 68 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P246 174 5 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1713 2935 79 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P141 131232 3270 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1731 382 8 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2009 1036 17 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P8264 0 1 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2842 0 1 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1560 3459 33 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1635 922 8 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P944 851 7 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P5051 39.500000 \n", "P4341 0.125000 \n", "P2095 0.078947 \n", "P990 0.040404 \n", "P3931 0.029361 \n", "P246 0.028736 \n", "P1713 0.026917 \n", "P141 0.024918 \n", "P1731 0.020942 \n", "P2009 0.016409 \n", "P8264 0.010000 \n", "P2842 0.010000 \n", "P1560 0.009540 \n", "P1635 0.008677 \n", "P944 0.008226 " ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 102, "id": "imposed-bibliography", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1411312323270[../../allConstraintsAnalysis_WRemoved_Final/c...0.024918
P108737746351210[../../allConstraintsAnalysis_WRemoved_Final/c...0.000321
P7959657006322[../../allConstraintsAnalysis_WRemoved_Final/c...0.000490
P5051279[../../allConstraintsAnalysis_WRemoved_Final/c...39.500000
P1713293579[../../allConstraintsAnalysis_WRemoved_Final/c...0.026917
P3931231668[../../allConstraintsAnalysis_WRemoved_Final/c...0.029361
P4511216750[../../allConstraintsAnalysis_WRemoved_Final/c...0.004109
P9152896647[../../allConstraintsAnalysis_WRemoved_Final/c...0.001623
P19717488844[../../allConstraintsAnalysis_WRemoved_Final/c...0.000252
P990108944[../../allConstraintsAnalysis_WRemoved_Final/c...0.040404
P1560345933[../../allConstraintsAnalysis_WRemoved_Final/c...0.009540
P29622922025[../../allConstraintsAnalysis_WRemoved_Final/c...0.000856
P2009103617[../../allConstraintsAnalysis_WRemoved_Final/c...0.016409
P2364970517[../../allConstraintsAnalysis_WRemoved_Final/c...0.001752
P2365185110[../../allConstraintsAnalysis_WRemoved_Final/c...0.005402
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P141 131232 3270 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1087 3774635 1210 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P7959 657006 322 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P5051 2 79 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1713 2935 79 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P3931 2316 68 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P451 12167 50 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P915 28966 47 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P197 174888 44 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P990 1089 44 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1560 3459 33 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2962 29220 25 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2009 1036 17 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2364 9705 17 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2365 1851 10 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P141 0.024918 \n", "P1087 0.000321 \n", "P7959 0.000490 \n", "P5051 39.500000 \n", "P1713 0.026917 \n", "P3931 0.029361 \n", "P451 0.004109 \n", "P915 0.001623 \n", "P197 0.000252 \n", "P990 0.040404 \n", "P1560 0.009540 \n", "P2962 0.000856 \n", "P2009 0.016409 \n", "P2364 0.001752 \n", "P2365 0.005402 " ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 103, "id": "emotional-crown", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 79.000000\n", "mean 0.506010\n", "std 4.443448\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000260\n", "75% 0.002110\n", "max 39.500000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 104, "id": "certain-freeze", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios')" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 105, "id": "cooperative-ownership", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005')" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3[codepConstDF3['violation_ratio'] <= 0.0005].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005\")" ] }, { "cell_type": "code", "execution_count": 106, "id": "studied-inclusion", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 1/79\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF3['violation_ratio'] >= 0.922928)}/{len(codepConstDF3)}\")" ] }, { "cell_type": "markdown", "id": "protective-brazil", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 107, "id": "laughing-pressing", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF4 = pd.DataFrame(codepConstViolations['Normal']).T" ] }, { "cell_type": "code", "execution_count": 108, "id": "loving-swift", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P101818616[../../allConstraintsAnalysis_WRemoved_Final/c...
P1539243577551[../../allConstraintsAnalysis_WRemoved_Final/c...
P2091631[../../allConstraintsAnalysis_WRemoved_Final/c...
P574552492[../../allConstraintsAnalysis_WRemoved_Final/c...
P746941325[../../allConstraintsAnalysis_WRemoved_Final/c...
............
P410138058813[../../allConstraintsAnalysis_WRemoved_Final/c...
P41176295541[../../allConstraintsAnalysis_WRemoved_Final/c...
P41338914535536[../../allConstraintsAnalysis_WRemoved_Final/c...
P4189611[../../allConstraintsAnalysis_WRemoved_Final/c...
P42134265514[../../allConstraintsAnalysis_WRemoved_Final/c...
\n", "

424 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1018 186 16 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P1539 243577 551 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P209 163 1 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P574 5524 92 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P746 941 325 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "... ... ... ...\n", "P410 138058 813 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P411 7629 5541 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P413 389145 35536 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P418 96 11 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "P4213 42655 14 [../../allConstraintsAnalysis_WRemoved_Final/c...\n", "\n", "[424 rows x 3 columns]" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4" ] }, { "cell_type": "code", "execution_count": 109, "id": "north-christian", "metadata": {}, "outputs": [], "source": [ "codepConstDF4['violation_ratio'] = codepConstDF4.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 110, "id": "closing-causing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1111146803[../../allConstraintsAnalysis_WRemoved_Final/c...46803.000000
P19958911360[../../allConstraintsAnalysis_WRemoved_Final/c...127.640449
P450142522684[../../allConstraintsAnalysis_WRemoved_Final/c...53.374118
P76829715346[../../allConstraintsAnalysis_WRemoved_Final/c...51.670034
P2757295163856[../../allConstraintsAnalysis_WRemoved_Final/c...22.461412
P2306115[../../allConstraintsAnalysis_WRemoved_Final/c...15.000000
P39123123775[../../allConstraintsAnalysis_WRemoved_Final/c...12.099359
P2248402541608[../../allConstraintsAnalysis_WRemoved_Final/c...10.337391
P2303440[../../allConstraintsAnalysis_WRemoved_Final/c...10.000000
P2325407940619[../../allConstraintsAnalysis_WRemoved_Final/c...9.958078
P272015140[../../allConstraintsAnalysis_WRemoved_Final/c...9.333333
P2243403536870[../../allConstraintsAnalysis_WRemoved_Final/c...9.137546
P2244403636801[../../allConstraintsAnalysis_WRemoved_Final/c...9.118186
P34485794620[../../allConstraintsAnalysis_WRemoved_Final/c...7.979275
P11836835394[../../allConstraintsAnalysis_WRemoved_Final/c...7.897511
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1111 1 46803 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1995 89 11360 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P4501 425 22684 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P768 297 15346 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P275 7295 163856 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2306 1 15 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P3912 312 3775 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2248 4025 41608 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2303 4 40 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2325 4079 40619 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2720 15 140 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2243 4035 36870 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2244 4036 36801 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P3448 579 4620 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1183 683 5394 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P1111 46803.000000 \n", "P1995 127.640449 \n", "P4501 53.374118 \n", "P768 51.670034 \n", "P275 22.461412 \n", "P2306 15.000000 \n", "P3912 12.099359 \n", "P2248 10.337391 \n", "P2303 10.000000 \n", "P2325 9.958078 \n", "P2720 9.333333 \n", "P2243 9.137546 \n", "P2244 9.118186 \n", "P3448 7.979275 \n", "P1183 7.897511 " ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 112, "id": "brief-effect", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142989348721476[../../allConstraintsAnalysis_WRemoved_Final/c...0.241349
P43331113458436995[../../allConstraintsAnalysis_WRemoved_Final/c...0.014045
P2757295163856[../../allConstraintsAnalysis_WRemoved_Final/c...22.461412
P2860174842113152425[../../allConstraintsAnalysis_WRemoved_Final/c...0.000872
P1435212163792401[../../allConstraintsAnalysis_WRemoved_Final/c...0.043552
P7084571857276[../../allConstraintsAnalysis_WRemoved_Final/c...1.252811
P19712201852917[../../allConstraintsAnalysis_WRemoved_Final/c...0.433682
P15984060049329[../../allConstraintsAnalysis_WRemoved_Final/c...1.215000
P1111146803[../../allConstraintsAnalysis_WRemoved_Final/c...46803.000000
P856145194944006[../../allConstraintsAnalysis_WRemoved_Final/c...0.030308
P2248402541608[../../allConstraintsAnalysis_WRemoved_Final/c...10.337391
P2325407940619[../../allConstraintsAnalysis_WRemoved_Final/c...9.958078
P2243403536870[../../allConstraintsAnalysis_WRemoved_Final/c...9.137546
P2244403636801[../../allConstraintsAnalysis_WRemoved_Final/c...9.118186
P41338914535536[../../allConstraintsAnalysis_WRemoved_Final/c...0.091318
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2989348 721476 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P433 31113458 436995 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P275 7295 163856 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2860 174842113 152425 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1435 2121637 92401 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P708 45718 57276 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P197 122018 52917 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1598 40600 49329 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P1111 1 46803 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P856 1451949 44006 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2248 4025 41608 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2325 4079 40619 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2243 4035 36870 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P2244 4036 36801 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "P413 389145 35536 [../../allConstraintsAnalysis_WRemoved_Final/c... \n", "\n", " violation_ratio \n", "P2214 0.241349 \n", "P433 0.014045 \n", "P275 22.461412 \n", "P2860 0.000872 \n", "P1435 0.043552 \n", "P708 1.252811 \n", "P197 0.433682 \n", "P1598 1.215000 \n", "P1111 46803.000000 \n", "P856 0.030308 \n", "P2248 10.337391 \n", "P2325 9.958078 \n", "P2243 9.137546 \n", "P2244 9.118186 \n", "P413 0.091318 " ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 113, "id": "wireless-passenger", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 424.000000\n", "mean 111.550177\n", "std 2272.909916\n", "min 0.000000\n", "25% 0.001966\n", "50% 0.036278\n", "75% 0.260204\n", "max 46803.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 114, "id": "civilian-arnold", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios')" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 115, "id": "threaded-cooler", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5')" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4[codepConstDF4['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 116, "id": "olympic-charlotte", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 29/424\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF4['violation_ratio'] >= 2.414703)}/{len(codepConstDF4)}\")" ] }, { "cell_type": "markdown", "id": "published-affiliate", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "aggregate-conservative", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "timesVersion = {\"MSN\": [], \"MN\": [], \"M\": [], \"N\": [], \"S\": []}\n", "filePath = '/data/wd-correctness/propertiesSplit_WRemoved_Final/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"timeLog_codepConst_\"):\n", " ver = filename.split('_')[2]\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes\n", " timesVersion[ver] += tempTimes\n", "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "hearing-treasury", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MSN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "animal-vocabulary", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "gentle-accessory", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['M']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "fresh-namibia", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['N']).describe())" ] }, { "cell_type": "markdown", "id": "industrial-parcel", "metadata": {}, "source": [ "## Symmetric Constraint (Q21510862)\n", "\n", "This constraint says, if node1 has a property with this constraint, then both `(node1)-[prop]->(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "silent-fundamentals", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 25, "id": "known-wednesday", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-15 07:17:19 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT *\n", " FROM graph_1 AS graph_1_c1\n", " WHERE graph_1_c1.\"label\"=?\n", " AND graph_1_c1.\"node2\"=?\n", " PARAS: ['P2302', 'Q21510862']\n", "---------------------------------------------\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510862)\" \\\n", " -o ../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 26, "id": "legal-diamond", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 27, "id": "exceptional-morris", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 28, "id": "burning-involvement", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 29, "id": "naval-identification", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 30, "id": "considered-madison", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 31, "id": "alone-cattle", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2316', 'P2303'], dtype=object)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 33, "id": "sensitive-alliance", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 34, "id": "tender-valley", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1id
P1322P1322-P2302-Q21510862-85dea891-0NaN[Normal]
P1327P1327-P2302-Q21510862-a3c3a094-0NaN[Normal]
P1382P1382-P2302-Q21510862-f6bcfecf-0NaN[Normal]
P1560P1560-P2302-Q21510862-fabecaeb-0NaN[Q21502408]
P1639P1639-P2302-Q21510862-384edcd4-0NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 id \n", "P1322 P1322-P2302-Q21510862-85dea891-0 NaN [Normal]\n", "P1327 P1327-P2302-Q21510862-a3c3a094-0 NaN [Normal]\n", "P1382 P1382-P2302-Q21510862-f6bcfecf-0 NaN [Normal]\n", "P1560 P1560-P2302-Q21510862-fabecaeb-0 NaN [Q21502408]\n", "P1639 P1639-P2302-Q21510862-384edcd4-0 NaN [Q21502408]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 35, "id": "cellular-canal", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 36, "id": "desperate-poster", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1
P1322NaN[Normal]
P1327NaN[Normal]
P1382NaN[Normal]
P1560NaN[Q21502408]
P1639NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 \n", "P1322 NaN [Normal]\n", "P1327 NaN [Normal]\n", "P1382 NaN [Normal]\n", "P1560 NaN [Q21502408]\n", "P1639 NaN [Q21502408]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "primary-netherlands", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 37, "id": "pointed-haven", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "\n", "folderName = 'symmetricConstraint'\n", "shellFileSuffix = 'symmConst_Validator_'\n", "graph_cache_prefix = 'symm_new_11_1_'\n", "\n", "for row in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " prop = row[0]\n", " constraint = row[1]\n", " mandatory = []\n", " suggestion = []\n", " normal = []\n", " prop = str(prop)\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " sfname = 'mandatory'\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " sfname = 'suggestion'\n", " elif constraint['P2316'][0] == 'Normal':\n", " sfname = 'normal'\n", " else:\n", " sfname = 'normal'\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".copy2.tsv \\\n", " --match 'tsv: (node1)-[nodeProp]->(node2), copy2: (node2)-[]->(node1)' \"\n", " \n", " os.system(\"cp ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".copy2.tsv\")\n", " \n", " if cnt % 60 == 0:\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_WRemoved_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " command\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = constraint['P2303']\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)" ] }, { "cell_type": "code", "execution_count": 38, "id": "polar-canada", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "39" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 59, "id": "virtual-disney", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,3):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/symmConst_Validator_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "coral-cheese", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 117, "id": "governmental-backup", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d0e1a54c683248f285687ab061bf42a0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3810b55aa13b4555b826a68f7b829338", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/13 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2152801[../../allConstraintsAnalysis_WRemoved_Final/s...0.012346
P1639209426[../../allConstraintsAnalysis_WRemoved_Final/s...0.012264
P1560345933[../../allConstraintsAnalysis_WRemoved_Final/s...0.009450
P336418061[../../allConstraintsAnalysis_WRemoved_Final/s...0.000553
P61852810[../../allConstraintsAnalysis_WRemoved_Final/s...0.000000
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2152 80 1 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P1639 2094 26 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P1560 3459 33 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P3364 1806 1 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P6185 281 0 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "\n", " violation_ratio \n", "P2152 0.012346 \n", "P1639 0.012264 \n", "P1560 0.009450 \n", "P3364 0.000553 \n", "P6185 0.000000 " ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF1 = pd.DataFrame(symmConstViolations['mandatory']).T\n", "symmConstDF1['violation_ratio'] = symmConstDF1.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 123, "id": "gross-extraction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P278910455124812[../../allConstraintsAnalysis_WRemoved_Final/s...0.191801
P188953023430928[../../allConstraintsAnalysis_WRemoved_Final/s...0.055114
P1971713943538[../../allConstraintsAnalysis_WRemoved_Final/s...0.020225
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2789 104551 24812 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P1889 530234 30928 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P197 171394 3538 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "\n", " violation_ratio \n", "P2789 0.191801 \n", "P1889 0.055114 \n", "P197 0.020225 " ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF2 = pd.DataFrame(symmConstViolations['suggestion']).T\n", "symmConstDF2['violation_ratio'] = symmConstDF2.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 124, "id": "heavy-scout", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P597401[../../allConstraintsAnalysis_WRemoved_Final/s...1.000000
P518805[../../allConstraintsAnalysis_WRemoved_Final/s...1.000000
P17061297[../../allConstraintsAnalysis_WRemoved_Final/s...0.889908
P5214211336[../../allConstraintsAnalysis_WRemoved_Final/s...0.760387
P2652498911[../../allConstraintsAnalysis_WRemoved_Final/s...0.646558
P68475231787524749[../../allConstraintsAnalysis_WRemoved_Final/s...0.500052
P870262[../../allConstraintsAnalysis_WRemoved_Final/s...0.250000
P1382108372911[../../allConstraintsAnalysis_WRemoved_Final/s...0.211740
P30321730358[../../allConstraintsAnalysis_WRemoved_Final/s...0.171456
P2293124922321[../../allConstraintsAnalysis_WRemoved_Final/s...0.156687
P451109351285[../../allConstraintsAnalysis_WRemoved_Final/s...0.105155
P13277908860[../../allConstraintsAnalysis_WRemoved_Final/s...0.098084
P4545464[../../allConstraintsAnalysis_WRemoved_Final/s...0.080000
P491532828[../../allConstraintsAnalysis_WRemoved_Final/s...0.078652
P5306730484[../../allConstraintsAnalysis_WRemoved_Final/s...0.067092
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5974 0 1 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P5188 0 5 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P1706 12 97 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P521 421 1336 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P2652 498 911 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P684 7523178 7524749 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P8702 6 2 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P1382 10837 2911 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P3032 1730 358 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P2293 12492 2321 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P451 10935 1285 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P1327 7908 860 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P4545 46 4 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P4915 328 28 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "P530 6730 484 [../../allConstraintsAnalysis_WRemoved_Final/s... \n", "\n", " violation_ratio \n", "P5974 1.000000 \n", "P5188 1.000000 \n", "P1706 0.889908 \n", "P521 0.760387 \n", "P2652 0.646558 \n", "P684 0.500052 \n", "P8702 0.250000 \n", "P1382 0.211740 \n", "P3032 0.171456 \n", "P2293 0.156687 \n", "P451 0.105155 \n", "P1327 0.098084 \n", "P4545 0.080000 \n", "P4915 0.078652 \n", "P530 0.067092 " ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF3 = pd.DataFrame(symmConstViolations['normal']).T\n", "symmConstDF3['violation_ratio'] = symmConstDF3.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 126, "id": "legitimate-aspect", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 127, "id": "junior-marketing", "metadata": {}, "outputs": [], "source": [ "pd.concat([symmConstDF1, symmConstDF2, symmConstDF3]).to_csv('../../allConstraintsAnalysis_WRemoved_Final/symmConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "unlikely-sewing", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "southern-reasoning", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "filePath = '/data/wd-correctness/propertiesSplit_WRemoved_Final/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"symmConst_Validator_\"):\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes" ] }, { "cell_type": "code", "execution_count": null, "id": "classical-updating", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "fundamental-bottle", "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "informed-animal", "metadata": {}, "source": [ "## Inverse Constraint (Q21510855)\n", "\n", "This constraint says, if node1 has a property with this constraint, then node2 must have the specified property and the value must be node1." ] }, { "cell_type": "markdown", "id": "dramatic-manchester", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 39, "id": "leading-server", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-15 07:17:37 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510855']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510855)\" \\\n", " -o ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 40, "id": "offshore-sudan", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "P1026-P2302-Q21510855-adc83b86-0\tP1026\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1029-P2302-Q21510855-6b55e057-0\tP1029\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P115-P2302-Q21510855-f7aa0b78-0\tP115\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1151-P2302-Q21510855-0d9aa9c6-0\tP1151\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1204-P2302-Q21510855-e3d53bb6-0\tP1204\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1283-P2302-Q21510855-0e7699bb-0\tP1283\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1308-P2302-Q21510855-2aba96b7-0\tP1308\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1365-P2302-Q21510855-c809b758-0\tP1365\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1366-P2302-Q21510855-eee12ef8-0\tP1366\tP2302\tQ21510855\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv" ] }, { "cell_type": "code", "execution_count": 41, "id": "received-colonial", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 42, "id": "overall-expense", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 43, "id": "valid-throat", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 44, "id": "focused-pennsylvania", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/inverseConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 45, "id": "moved-rental", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 46, "id": "attached-rings", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2316', 'P4155', 'P2303'], dtype=object)" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 48, "id": "local-forty", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 49, "id": "pressed-upset", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1id
P1026P1026-P2302-Q21510855-adc83b86-0NaN[P50]NaNNaN
P1029P1029-P2302-Q21510855-6b55e057-0NaN[P5096]NaNNaN
P115P115-P2302-Q21510855-f7aa0b78-0NaN[P466]NaNNaN
P1151P1151-P2302-Q21510855-0d9aa9c6-0NaN[P1204][Q21502408]NaN
P1204P1204-P2302-Q21510855-e3d53bb6-0NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 id \n", "P1026 P1026-P2302-Q21510855-adc83b86-0 NaN [P50] NaN NaN\n", "P1029 P1029-P2302-Q21510855-6b55e057-0 NaN [P5096] NaN NaN\n", "P115 P115-P2302-Q21510855-f7aa0b78-0 NaN [P466] NaN NaN\n", "P1151 P1151-P2302-Q21510855-0d9aa9c6-0 NaN [P1204] [Q21502408] NaN\n", "P1204 P1204-P2302-Q21510855-e3d53bb6-0 NaN [P1151] NaN NaN" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 50, "id": "extra-stomach", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 51, "id": "seeing-marine", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1
P1026NaN[P50]NaNNaN
P1029NaN[P5096]NaNNaN
P115NaN[P466]NaNNaN
P1151NaN[P1204][Q21502408]NaN
P1204NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 \n", "P1026 NaN [P50] NaN NaN\n", "P1029 NaN [P5096] NaN NaN\n", "P115 NaN [P466] NaN NaN\n", "P1151 NaN [P1204] [Q21502408] NaN\n", "P1204 NaN [P1151] NaN NaN" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "composite-cutting", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 52, "id": "acoustic-belarus", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "13b5b09ee42643d687f6eff8bf93dfcd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "fOP = None\n", "\n", "folderName = 'inverseConstraint'\n", "shellFileSuffix = 'invConst_Validator_'\n", "graph_cache_file_prefix = \"inv_new_11_1_\"\n", "\n", "for prop, constraint in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " subFolderName = \"mandatory\"\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " subFolderName = \"suggestion\"\n", " else:\n", " subFolderName = \"normal\"\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " prop2 = constraint['P2306']\n", "\n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", "\n", " if not(os.path.isfile(\"../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_WRemoved_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_WRemoved_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplit_WRemoved_Final/claims.\"+ prop2 +\".tsv \\\n", " --match '\"+ \\\n", " f\"{prop}: (node1)-[nodeProp]->(node2), {prop2}: (node2)-[]->(node1)' \"\n", "\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = set(constraint['P2303'])\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_WRemoved_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", "# print(command) \n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_WRemoved_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_WRemoved_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 53, "id": "large-climb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "110" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 60, "id": "involved-vietnamese", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,7):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_WRemoved_Final/checkViolations/invConst_Validator_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "retired-audio", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 129, "id": "specified-evanescence", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e5f9d7c3cc9b4c5bb69fa64c89494ed0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "10896285c1ae44658231d4707c063b4a", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/12 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P267383970[../../allConstraintsAnalysis_WRemoved_Final/i...0.077008
P41472868[../../allConstraintsAnalysis_WRemoved_Final/i...0.027211
P450177930[../../allConstraintsAnalysis_WRemoved_Final/i...0.016584
P1151180330[../../allConstraintsAnalysis_WRemoved_Final/i...0.016367
P2033187928[../../allConstraintsAnalysis_WRemoved_Final/i...0.014683
P41492864[../../allConstraintsAnalysis_WRemoved_Final/i...0.013793
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2673 839 70 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P4147 286 8 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P450 1779 30 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P1151 1803 30 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P2033 1879 28 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P4149 286 4 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "\n", " violation_ratio \n", "P2673 0.077008 \n", "P4147 0.027211 \n", "P450 0.016584 \n", "P1151 0.016367 \n", "P2033 0.014683 \n", "P4149 0.013793 " ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1 = pd.DataFrame(invConstViolations['mandatory']).T\n", "invConstDF1['violation_ratio'] = invConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 135, "id": "valid-symposium", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P143436775071[../../allConstraintsAnalysis_WRemoved_Final/i...0.579675
P155103664756650[../../allConstraintsAnalysis_WRemoved_Final/i...0.051816
P156103663644187[../../allConstraintsAnalysis_WRemoved_Final/i...0.040883
P629740301108[../../allConstraintsAnalysis_WRemoved_Final/i...0.014746
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1434 3677 5071 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P155 1036647 56650 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P156 1036636 44187 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P629 74030 1108 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "\n", " violation_ratio \n", "P1434 0.579675 \n", "P155 0.051816 \n", "P156 0.040883 \n", "P629 0.014746 " ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF2 = pd.DataFrame(invConstViolations['suggestion']).T\n", "invConstDF2['violation_ratio'] = invConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 136, "id": "resident-mustang", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P160513191[../../allConstraintsAnalysis_WRemoved_Final/i...0.936275
P34486054595[../../allConstraintsAnalysis_WRemoved_Final/i...0.883654
P92616[../../allConstraintsAnalysis_WRemoved_Final/i...0.857143
P10294902662[../../allConstraintsAnalysis_WRemoved_Final/i...0.844543
P92515[../../allConstraintsAnalysis_WRemoved_Final/i...0.833333
P115694825081[../../allConstraintsAnalysis_WRemoved_Final/i...0.783072
P51342354[../../allConstraintsAnalysis_WRemoved_Final/i...0.701299
P38161427[../../allConstraintsAnalysis_WRemoved_Final/i...0.658537
P128314052479[../../allConstraintsAnalysis_WRemoved_Final/i...0.638260
P8625915[../../allConstraintsAnalysis_WRemoved_Final/i...0.625000
P42527413337[../../allConstraintsAnalysis_WRemoved_Final/i...0.549029
P51328190[../../allConstraintsAnalysis_WRemoved_Final/i...0.526316
P2512221172[../../allConstraintsAnalysis_WRemoved_Final/i...0.437659
P167764[../../allConstraintsAnalysis_WRemoved_Final/i...0.400000
P25781111723[../../allConstraintsAnalysis_WRemoved_Final/i...0.394220
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1605 13 191 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P3448 605 4595 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P926 1 6 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P1029 490 2662 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P925 1 5 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P115 6948 25081 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P5134 23 54 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P3816 14 27 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P1283 1405 2479 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P8625 9 15 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P425 2741 3337 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P5132 81 90 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P2512 221 172 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P1677 6 4 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "P2578 1111 723 [../../allConstraintsAnalysis_WRemoved_Final/i... \n", "\n", " violation_ratio \n", "P1605 0.936275 \n", "P3448 0.883654 \n", "P926 0.857143 \n", "P1029 0.844543 \n", "P925 0.833333 \n", "P115 0.783072 \n", "P5134 0.701299 \n", "P3816 0.658537 \n", "P1283 0.638260 \n", "P8625 0.625000 \n", "P425 0.549029 \n", "P5132 0.526316 \n", "P2512 0.437659 \n", "P1677 0.400000 \n", "P2578 0.394220 " ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF3 = pd.DataFrame(invConstViolations['normal']).T\n", "invConstDF3['violation_ratio'] = invConstDF3.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 138, "id": "entire-gauge", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "invConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 139, "id": "infectious-clothing", "metadata": {}, "outputs": [], "source": [ "pd.concat([invConstDF1, invConstDF2, invConstDF3]).to_csv('../../allConstraintsAnalysis_WRemoved_Final/invConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "working-stable", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "saved-twelve", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "filePath = '/data/wd-correctness/propertiesSplit_WRemoved_Final/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"invConst_Validator\"):\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes" ] }, { "cell_type": "code", "execution_count": null, "id": "caroline-observation", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "static-conclusion", "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "mature-suite", "metadata": {}, "source": [ "# Combine Plots for constraints" ] }, { "cell_type": "code", "execution_count": 30, "id": "behind-nurse", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "typeConstDF = pd.read_csv(\"../../allConstraintsAnalysis_WRemoved_Final/typeConstDFAnalysis.csv\")\n", "typeConstDF = typeConstDF.set_index(typeConstDF.iloc[:, 0])\n", "\n", "valTypeConstDF = pd.read_csv(\"../../allConstraintsAnalysis_WRemoved_Final/valueTypeConstDFAnalysis.csv\")\n", "valTypeConstDF = valTypeConstDF.set_index(valTypeConstDF.iloc[:, 0])\n", "\n", "codepConstDF1 = pd.read_csv(\"../../allConstraintsAnalysis_WRemoved_Final/codepConstDFAnalysis.csv\")\n", "codepConstDF1 = codepConstDF1.set_index(codepConstDF1.iloc[:, 0])\n", "\n", "symmConstDF = pd.read_csv(\"../../allConstraintsAnalysis_WRemoved_Final/symmConstDFAnalysis.csv\")\n", "symmConstDF = symmConstDF.set_index(symmConstDF.iloc[:, 0])\n", "\n", "invConstDF = pd.read_csv(\"../../allConstraintsAnalysis_WRemoved_Final/invConstDFAnalysis.csv\")\n", "invConstDF = invConstDF.set_index(invConstDF.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": 3, "id": "ultimate-chorus", "metadata": {}, "outputs": [], "source": [ "typeConstDF1 = typeConstDF.add_suffix(\"_type_const\")['violation_ratio_type_const'].rename().sort_values()" ] }, { "cell_type": "code", "execution_count": 4, "id": "dynamic-castle", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1 = valTypeConstDF.add_suffix(\"_valuetype_const\")['violation_ratio_valuetype_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 5, "id": "global-performer", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1 = codepConstDF1.add_suffix(\"_codep_const\")['violation_ratio_codep_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 6, "id": "enabling-instrumentation", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1 = symmConstDF.add_suffix(\"_symm_const\")['violation_ratio_symm_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 7, "id": "realistic-cannon", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1 = invConstDF.add_suffix(\"_inv_const\")['violation_ratio_inv_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 8, "id": "about-coalition", "metadata": {}, "outputs": [], "source": [ "typeConstDF1.index.names = ['property']\n", "typeConstDF1 = typeConstDF1.reset_index().reset_index()\n", "typeConstDF1['index'] = typeConstDF1['index'].apply(lambda p: (p+1) * 100/len(typeConstDF1))\n", "typeConstDF1 = typeConstDF1.set_index('index')[0]" ] }, { "cell_type": "code", "execution_count": 9, "id": "hidden-anaheim", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1.index.names = ['property']\n", "valTypeConstDF1 = valTypeConstDF1.reset_index().reset_index()\n", "valTypeConstDF1['index'] = valTypeConstDF1['index'].apply(lambda p: (p+1) * 100/len(valTypeConstDF1))\n", "valTypeConstDF1 = valTypeConstDF1.set_index('index')['violation_ratio_valuetype_const']" ] }, { "cell_type": "code", "execution_count": 10, "id": "toxic-straight", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1.index.names = ['property']\n", "codepConstDF1_1 = codepConstDF1_1.reset_index().reset_index()\n", "codepConstDF1_1['index'] = codepConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(codepConstDF1_1))\n", "codepConstDF1_1 = codepConstDF1_1.set_index('index')['violation_ratio_codep_const']" ] }, { "cell_type": "code", "execution_count": 11, "id": "legal-socket", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1.index.names = ['property']\n", "symmConstDF1_1 = symmConstDF1_1.reset_index().reset_index()\n", "symmConstDF1_1['index'] = symmConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(symmConstDF1_1))\n", "symmConstDF1_1 = symmConstDF1_1.set_index('index')['violation_ratio_symm_const']" ] }, { "cell_type": "code", "execution_count": 12, "id": "dietary-attack", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1.index.names = ['property']\n", "invConstDF1_1 = invConstDF1_1.reset_index().reset_index()\n", "invConstDF1_1['index'] = invConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(invConstDF1_1))\n", "invConstDF1_1 = invConstDF1_1.set_index('index')['violation_ratio_inv_const']" ] }, { "cell_type": "code", "execution_count": 23, "id": "familiar-packing", "metadata": {}, "outputs": [], "source": [ "typeConstDF2 = [np.percentile(typeConstDF1,i)*100 for i in range(1, 101)]\n", "valTypeConstDF2 = [np.percentile(valTypeConstDF1,i)*100 for i in range(1, 101)]\n", "codepConstDF1_2 = [np.percentile(codepConstDF1_1,i)*100 for i in range(1, 101)]\n", "symmConstDF1_2 = [np.percentile(symmConstDF1_1,i)*100 for i in range(1, 101)]\n", "invConstDF1_2 = [np.percentile(invConstDF1_1,i)*100 for i in range(1, 101)]" ] }, { "cell_type": "code", "execution_count": 24, "id": "forward-tutorial", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = pd.DataFrame({'index':list(range(1, 101)), 'type': typeConstDF2, 'value type': valTypeConstDF2, 'irs': codepConstDF1_2, 'symmetric': symmConstDF1_2, 'inverse': invConstDF1_2})" ] }, { "cell_type": "code", "execution_count": 25, "id": "mobile-rings", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = constAnalysisDF.melt('index', var_name='constraint', value_name='VR')" ] }, { "cell_type": "code", "execution_count": 26, "id": "contrary-conviction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexconstraintVR
01type0.0
12type0.0
23type0.0
34type0.0
45type0.0
\n", "
" ], "text/plain": [ " index constraint VR\n", "0 1 type 0.0\n", "1 2 type 0.0\n", "2 3 type 0.0\n", "3 4 type 0.0\n", "4 5 type 0.0" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF.head()" ] }, { "cell_type": "code", "execution_count": 33, "id": "ruled-rough", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Proportion of properties (in %)'),\n", " Text(0, 0.5, 'Violation Ratio (in %)')]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10, 6))\n", "ax = sns.lineplot(x='index', y='VR', hue='constraint', data=constAnalysisDF)\n", "ax.set(xlabel=\"Proportion of properties (in %)\", ylabel = \"Violation Ratio (in %)\")" ] }, { "cell_type": "markdown", "id": "eleven-album", "metadata": {}, "source": [ "Fin." ] }, { "cell_type": "code", "execution_count": null, "id": "brilliant-drawing", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "kgtkEnv", "language": "python", "name": "kgtkenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "318px" }, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "oldHeight": 122, "position": { "height": "40px", "left": "1170px", "right": "20px", "top": "120px", "width": "250px" }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "varInspector_section_display": "none", "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }