{ "cells": [ { "cell_type": "markdown", "id": "korean-footage", "metadata": {}, "source": [ "# Comprehensive Constraints Analysis - Final\n", "\n", "In this notebook, for the original dataset, the violations are determined in total." ] }, { "cell_type": "code", "execution_count": 2, "id": "juvenile-ability", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f09d8d199d445fbb9e4ed86e3bb148e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1149471184 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1001P2308[Q102496, Q105985, Q1140371, Q1151067, Q119768...
1P1001P2309[Q30208840]
2P1002P2308[Q630010]
3P1002P2309[Q21514624]
4P1004P2308[Q2221906, Q23413, Q3947, Q41176, Q88291]
\n", "" ], "text/plain": [ " node1 label node2\n", "0 P1001 P2308 [Q102496, Q105985, Q1140371, Q1151067, Q119768...\n", "1 P1001 P2309 [Q30208840]\n", "2 P1002 P2308 [Q630010]\n", "3 P1002 P2309 [Q21514624]\n", "4 P1004 P2308 [Q2221906, Q23413, Q3947, Q41176, Q88291]" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.head()" ] }, { "cell_type": "code", "execution_count": 107, "id": "still-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
9318P8138P2308[Q27096213]
9319P8138P2309[Q21514624]
\n", "
" ], "text/plain": [ " node1 label node2\n", "9318 P8138 P2308 [Q27096213]\n", "9319 P8138 P2309 [Q21514624]" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1[df1['node1'] == 'P8138']" ] }, { "cell_type": "markdown", "id": "solid-browser", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 111, "id": "bright-impossible", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f49fcb6a808d4cdc962dabc196bda584", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4810 [00:00(node2), \" + parentFile + \": (node1)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " --graph-cache ~/sqlite3_caches/type_new_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv ;\\\n", " kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", " --match 'm: (node1)-[nodeProp]->(node2), \" + parentFile + \": (node1)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " --graph-cache ~/sqlite3_caches/type_new_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ; \\\n", " kgtk --debug cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv ; \\\n", " ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\")\n", "\n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 112, "id": "electrical-agreement", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1456" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 113, "id": "outside-stupid", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,14):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/typeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "competitive-canvas", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 151, "id": "casual-perth", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6360937a1b564337844cf1a476654013", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a3ff347183364768928afec922878a66", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/825 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P6213545[../../allConstraintsAnalysis_Final/typeConstr...0.013928
P302237430[../../allConstraintsAnalysis_Final/typeConstr...0.000000
P543790[../../allConstraintsAnalysis_Final/typeConstr...0.000000
P7204030[../../allConstraintsAnalysis_Final/typeConstr...0.000000
P131210547[../../allConstraintsAnalysis_Final/typeConstr...0.006598
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P621 354 5 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P3022 3743 0 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P543 79 0 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P720 403 0 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P1312 1054 7 [../../allConstraintsAnalysis_Final/typeConstr... \n", "\n", " violation_ratio \n", "P621 0.013928 \n", "P3022 0.000000 \n", "P543 0.000000 \n", "P720 0.000000 \n", "P1312 0.006598 " ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 157, "id": "competitive-peeing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P81380461[../../allConstraintsAnalysis_Final/typeConstr...1.0
P5051064[../../allConstraintsAnalysis_Final/typeConstr...1.0
P2303039[../../allConstraintsAnalysis_Final/typeConstr...1.0
P1227019[../../allConstraintsAnalysis_Final/typeConstr...1.0
P2308017[../../allConstraintsAnalysis_Final/typeConstr...1.0
P6001016[../../allConstraintsAnalysis_Final/typeConstr...1.0
P8738014[../../allConstraintsAnalysis_Final/typeConstr...1.0
P538010[../../allConstraintsAnalysis_Final/typeConstr...1.0
P800406[../../allConstraintsAnalysis_Final/typeConstr...1.0
P558904[../../allConstraintsAnalysis_Final/typeConstr...1.0
P651004[../../allConstraintsAnalysis_Final/typeConstr...1.0
P717403[../../allConstraintsAnalysis_Final/typeConstr...1.0
P81703[../../allConstraintsAnalysis_Final/typeConstr...1.0
P488203[../../allConstraintsAnalysis_Final/typeConstr...1.0
P601403[../../allConstraintsAnalysis_Final/typeConstr...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P8138 0 461 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P5051 0 64 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P2303 0 39 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P1227 0 19 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P2308 0 17 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P6001 0 16 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P8738 0 14 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P538 0 10 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P8004 0 6 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P5589 0 4 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P6510 0 4 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P7174 0 3 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P817 0 3 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P4882 0 3 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P6014 0 3 [../../allConstraintsAnalysis_Final/typeConstr... \n", "\n", " violation_ratio \n", "P8138 1.0 \n", "P5051 1.0 \n", "P2303 1.0 \n", "P1227 1.0 \n", "P2308 1.0 \n", "P6001 1.0 \n", "P8738 1.0 \n", "P538 1.0 \n", "P8004 1.0 \n", "P5589 1.0 \n", "P6510 1.0 \n", "P7174 1.0 \n", "P817 1.0 \n", "P4882 1.0 \n", "P6014 1.0 " ] }, "execution_count": 157, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 160, "id": "clinical-lawsuit", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1456.000000\n", "mean 0.100094\n", "std 0.222405\n", "min 0.000000\n", "25% 0.000708\n", "50% 0.009036\n", "75% 0.061943\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 161, "id": "wanted-domestic", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios')" ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 162, "id": "sufficient-hollywood", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios (<=0.05)')" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF[typeConstDF['violation_ratio'] <= 0.05].violation_ratio.plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios (<=0.05)\")" ] }, { "cell_type": "code", "execution_count": 163, "id": "minor-marshall", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/1456\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(typeConstDF['violation_ratio'] >= 5.286054)}/{len(typeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 166, "id": "revolutionary-violence", "metadata": {}, "outputs": [], "source": [ "for key1 in typeConstViolations.keys():\n", " typeConstViolations[key1]['correct'] = typeConstViolations[key1]['instanceOf']['correct'] + typeConstViolations[key1]['subclass']['correct'] + typeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " typeConstViolations[key1]['incorrect'] = typeConstViolations[key1]['instanceOf']['incorrect'] + typeConstViolations[key1]['subclass']['incorrect'] + typeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " typeConstViolations[key1]['VR'] = typeConstViolations[key1]['incorrect'] / (typeConstViolations[key1]['correct'] + typeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 167, "id": "emotional-favorite", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 44771087, 'incorrect': 37435},\n", " 'subclass': {'correct': 1966, 'incorrect': 30},\n", " 'instanceOfOrSubclass': {'correct': 221394, 'incorrect': 204},\n", " 'propCount': 165,\n", " 'correct': 44994447,\n", " 'incorrect': 37669,\n", " 'VR': 0.0008364918939185536},\n", " 'suggestion': {'instanceOf': {'correct': 61720, 'incorrect': 18709},\n", " 'subclass': {'correct': 0, 'incorrect': 0},\n", " 'instanceOfOrSubclass': {'correct': 23314, 'incorrect': 2939},\n", " 'propCount': 11,\n", " 'correct': 85034,\n", " 'incorrect': 21648,\n", " 'VR': 0.20292083013066872},\n", " 'normal': {'instanceOf': {'correct': 398242129, 'incorrect': 3281243},\n", " 'subclass': {'correct': 96605, 'incorrect': 9501},\n", " 'instanceOfOrSubclass': {'correct': 66370309, 'incorrect': 285585},\n", " 'propCount': 1280,\n", " 'correct': 464709043,\n", " 'incorrect': 3576329,\n", " 'VR': 0.007637071781093346}}" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstViolations" ] }, { "cell_type": "code", "execution_count": 168, "id": "aggregate-impact", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratiototal
P2093134805165543246[../../allConstraintsAnalysis_Final/typeConstr...0.004014135348411
P1476398981421858292[../../allConstraintsAnalysis_Final/typeConstr...0.04450341756434
P5773885531433977[../../allConstraintsAnalysis_Final/typeConstr...0.00087438889291
P14333672328312050[../../allConstraintsAnalysis_Final/typeConstr...0.00032836735333
P12153309919524104[../../allConstraintsAnalysis_Final/typeConstr...0.00072833123299
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2093 134805165 543246 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P1476 39898142 1858292 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P577 38855314 33977 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P1433 36723283 12050 [../../allConstraintsAnalysis_Final/typeConstr... \n", "P1215 33099195 24104 [../../allConstraintsAnalysis_Final/typeConstr... \n", "\n", " violation_ratio total \n", "P2093 0.004014 135348411 \n", "P1476 0.044503 41756434 \n", "P577 0.000874 38889291 \n", "P1433 0.000328 36735333 \n", "P1215 0.000728 33123299 " ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['total'] = typeConstDF['correct'] + typeConstDF['incorrect']\n", "typeConstDF.sort_values(by=['total'],ascending=False).head()" ] }, { "cell_type": "code", "execution_count": 169, "id": "grateful-telling", "metadata": {}, "outputs": [], "source": [ "typeConstDF.to_csv('../../allConstraintsAnalysis_Final/typeConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "bearing-kruger", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 124, "id": "veterinary-fault", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9b0ae1938e10429e8c42a485d82b5dcf", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/52 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for type constraint checks\")" ] }, { "cell_type": "markdown", "id": "intense-computer", "metadata": {}, "source": [ "## Value Type Constraint\n", "\n", "Here, the constraint indicates that node2 must be an instance of or subclass of the specified class" ] }, { "cell_type": "markdown", "id": "animated-companion", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 145, "id": "static-profit", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "dfValueType = pd.read_csv('../../constraintsOP/valuetypeConstraint/claims.type-constraints_all1.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 146, "id": "worthy-malawi", "metadata": {}, "outputs": [], "source": [ "dfValueType = dfValueType.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 147, "id": "eleven-tiffany", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1000P2308[Q1241356]
1P1000P2309[Q30208840]
2P1001P2308[Q20926517, Q2881272, Q2882257, Q3624078, Q389...
3P1001P2309[Q30208840]
4P1002P2308[Q2576663]
\n", "
" ], "text/plain": [ " node1 label node2\n", "0 P1000 P2308 [Q1241356]\n", "1 P1000 P2309 [Q30208840]\n", "2 P1001 P2308 [Q20926517, Q2881272, Q2882257, Q3624078, Q389...\n", "3 P1001 P2309 [Q30208840]\n", "4 P1002 P2308 [Q2576663]" ] }, "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType.head()" ] }, { "cell_type": "code", "execution_count": 148, "id": "expired-stuff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2308', 'P2309', 'P2303', 'P2316', 'P6607', 'P2304'], dtype=object)" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType['label'].unique()" ] }, { "cell_type": "markdown", "id": "digital-harvard", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 121, "id": "white-badge", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f527d490f81947a59a13375a7339ac44", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/932 [00:00(node2), \" + parentFile + \": (node2)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " --graph-cache ~/sqlite3_caches/valueType_new_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv ;\\\n", " kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", " --match 'm: (node1)-[nodeProp]->(node2), \" + parentFile + \": (node2)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " --graph-cache ~/sqlite3_caches/valueType_new_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect_temp.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ; \\\n", " kgtk --debug cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct_temp2.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv ; \\\n", " ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\")\n", "\n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 122, "id": "qualified-cursor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "897" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 123, "id": "simplified-cameroon", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,9):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/valueTypeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "spectacular-warner", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 170, "id": "valid-defense", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "85e58dd5e214481892dc9e3326a3a2ee", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7ab643bdd19f4e2da4972159d4bf906e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/530 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2302422110[../../allConstraintsAnalysis_Final/valueTypeC...0.000000
P3092462029[../../allConstraintsAnalysis_Final/valueTypeC...0.006238
P3096111502[../../allConstraintsAnalysis_Final/valueTypeC...0.000179
P450175520[../../allConstraintsAnalysis_Final/valueTypeC...0.011268
P31563480[../../allConstraintsAnalysis_Final/valueTypeC...0.000000
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2302 42211 0 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P3092 4620 29 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P3096 11150 2 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P450 1755 20 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P3156 348 0 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "\n", " violation_ratio \n", "P2302 0.000000 \n", "P3092 0.006238 \n", "P3096 0.000179 \n", "P450 0.011268 \n", "P3156 0.000000 " ] }, "execution_count": 175, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 176, "id": "neural-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P50080331026[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P610409764[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P7374032[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P2839014[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P3028013[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P3027012[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P538010[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P14307[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P442506[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P619105[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P653305[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P653405[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P717403[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P92202[../../allConstraintsAnalysis_Final/valueTypeC...1.0
P66002[../../allConstraintsAnalysis_Final/valueTypeC...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5008 0 331026 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P6104 0 9764 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P7374 0 32 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P2839 0 14 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P3028 0 13 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P3027 0 12 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P538 0 10 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P143 0 7 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P4425 0 6 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P6191 0 5 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P6533 0 5 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P6534 0 5 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P7174 0 3 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P922 0 2 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "P660 0 2 [../../allConstraintsAnalysis_Final/valueTypeC... \n", "\n", " violation_ratio \n", "P5008 1.0 \n", "P6104 1.0 \n", "P7374 1.0 \n", "P2839 1.0 \n", "P3028 1.0 \n", "P3027 1.0 \n", "P538 1.0 \n", "P143 1.0 \n", "P4425 1.0 \n", "P6191 1.0 \n", "P6533 1.0 \n", "P6534 1.0 \n", "P7174 1.0 \n", "P922 1.0 \n", "P660 1.0 " ] }, "execution_count": 176, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 179, "id": "cutting-polyester", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 897.000000\n", "mean 0.087631\n", "std 0.201504\n", "min 0.000000\n", "25% 0.000867\n", "50% 0.008197\n", "75% 0.053288\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 179, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 180, "id": "alert-receiver", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios')" ] }, "execution_count": 180, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 181, "id": "italian-motel", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios (<=0.04)')" ] }, "execution_count": 181, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF[valTypeConstDF['violation_ratio'] <= 0.04].violation_ratio.plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios (<=0.04)\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "prescription-ceramic", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/897\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(valTypeConstDF['violation_ratio'] >= 3.950680)}/{len(valTypeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "tutorial-mineral", "metadata": {}, "outputs": [], "source": [ "for key1 in valueTypeConstViolations.keys():\n", " valueTypeConstViolations[key1]['correct'] = valueTypeConstViolations[key1]['instanceOf']['correct'] + valueTypeConstViolations[key1]['subclass']['correct'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " valueTypeConstViolations[key1]['incorrect'] = valueTypeConstViolations[key1]['instanceOf']['incorrect'] + valueTypeConstViolations[key1]['subclass']['incorrect'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " valueTypeConstViolations[key1]['VR'] = valueTypeConstViolations[key1]['incorrect'] / (valueTypeConstViolations[key1]['correct'] + valueTypeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 16, "id": "satellite-concern", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 11391695, 'incorrect': 5338},\n", " 'subclass': {'correct': 44764, 'incorrect': 1},\n", " 'instanceOfOrSubclass': {'correct': 11638, 'incorrect': 47},\n", " 'propCount': 106,\n", " 'correct': 11448097,\n", " 'incorrect': 5386,\n", " 'VR': 0.0004702499667568372},\n", " 'suggestion': {'instanceOf': {'correct': 46036, 'incorrect': 486},\n", " 'subclass': {'correct': 118, 'incorrect': 26},\n", " 'instanceOfOrSubclass': {'correct': 0, 'incorrect': 0},\n", " 'propCount': 5,\n", " 'correct': 46154,\n", " 'incorrect': 512,\n", " 'VR': 0.010971585308361549},\n", " 'normal': {'instanceOf': {'correct': 88862554, 'incorrect': 847461},\n", " 'subclass': {'correct': 4373665, 'incorrect': 11677},\n", " 'instanceOfOrSubclass': {'correct': 76234049, 'incorrect': 257925},\n", " 'propCount': 786,\n", " 'correct': 169470268,\n", " 'incorrect': 1117063,\n", " 'VR': 0.0065483350577775325}}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valueTypeConstViolations" ] }, { "cell_type": "code", "execution_count": 17, "id": "fabulous-sudan", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF.to_csv('../../allConstraintsAnalysis_Final/valueTypeConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "traditional-shakespeare", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 127, "id": "spoken-symphony", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5275228e86554cfeb4ac2ffbbf029d30", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/52 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for value type constraint checks\")" ] }, { "cell_type": "markdown", "id": "motivated-sympathy", "metadata": {}, "source": [ "\n", "## Item Requires Statement Constraint\n", "\n", "Here, the constraint mentions the other properties that node1 must have and the values that this property must have in few cases" ] }, { "cell_type": "markdown", "id": "chubby-glass", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 37, "id": "funny-batch", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/itemRequiresConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 38, "id": "original-expression", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 39, "id": "adequate-symphony", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2305', 'P2316', 'P2304', 'P2303', 'P6607', 'P4155',\n", " 'P31', 'P2916', 'P4680', 'P2308'], dtype=object)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 40, "id": "infrared-canal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 7182\n", "P2305 2540\n", "P2316 2523\n", "P2303 422\n", "P2304 14\n", "P6607 14\n", "P2916 5\n", "P4680 2\n", "P31 1\n", "P4155 1\n", "P2308 1\n", "Name: label, dtype: int64" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 41, "id": "focused-karen", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 42, "id": "private-boundary", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1id
P1006P1006-P2302-Q21503247-0451ef47-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010P1010-P2302-Q21503247-56183614-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010-P2302-Q21503247-fd256eaf-0NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015P1015-P2302-Q21503247-20e3bfc5-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017P1017-P2302-Q21503247-bbac2ce3-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN [P214] NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN [P31] NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN NaN [Q794] [P17] NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN [P31] NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN [P214] NaN \n", "\n", "label P2316 P2916 P31 P4155 P4680 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN NaN NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN NaN NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 [Q21502408] NaN NaN NaN NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN NaN NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 43, "id": "conceptual-schedule", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 44, "id": "third-hayes", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1006NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 P4680 \\\n", "node1 \n", "P1006 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN [Q794] [P17] NaN [Q21502408] NaN NaN NaN NaN \n", "P1015 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1017 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 \n", "P1006 NaN \n", "P1010 NaN \n", "P1010 NaN \n", "P1015 NaN \n", "P1017 NaN " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "shaped-companion", "metadata": {}, "source": [ "However, there is one anomaly where the property does not have a co-dependency constraint associated with it, but still has a link to this constraint." ] }, { "cell_type": "markdown", "id": "forced-christmas", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "markdown", "id": "acquired-floor", "metadata": {}, "source": [ "#### Version 1 - Mandatory + Suggestion + Normal" ] }, { "cell_type": "code", "execution_count": 138, "id": "turkish-establishment", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dd67832afa3d4306831f16413a32f4c7", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + suggestion + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 139, "id": "peripheral-herald", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "527" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 140, "id": "incorporated-logistics", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fCnt" ] }, { "cell_type": "code", "execution_count": 141, "id": "optimum-blowing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,28):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/codepConst_MSN_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "indoor-verse", "metadata": {}, "source": [ "#### Version 2 - Mandatory + Normal" ] }, { "cell_type": "code", "execution_count": 142, "id": "furnished-paradise", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ca1160d8482c430aa818f75f9db27c87", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 143, "id": "searching-individual", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "468" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 144, "id": "silver-clarity", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,25):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/codepConst_MN_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "prescription-access", "metadata": {}, "source": [ "#### Version 3 - Mandatory" ] }, { "cell_type": "code", "execution_count": 145, "id": "married-porter", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2bd0042741c1429a9faa460854207956", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 146, "id": "according-blackberry", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "78" ] }, "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 147, "id": "extraordinary-drawing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,5):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/codepConst_M_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "subsequent-brown", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 148, "id": "operational-migration", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ae6bc474a4b0407e912989ba0886bc0c", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv\\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 149, "id": "harmful-binary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "418" ] }, "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 150, "id": "advance-married", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,23):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/codepConst_N_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "ranging-journal", "metadata": {}, "source": [ "#### Version 5 - Suggestion" ] }, { "cell_type": "code", "execution_count": 151, "id": "missing-jordan", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "39febc19cd2e40ca84236ef34194a392", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = suggestion\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 152, "id": "soviet-forth", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "97" ] }, "execution_count": 152, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 153, "id": "racial-stationery", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,6):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/codepConst_S_Validator\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "homeless-pleasure", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 47, "id": "welcome-dependence", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "471428b6d5a3416a8dd76292d0b8b075", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b51f16f69bb8487d9afc530bbc281ad5", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1192 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P2322120765[../../allConstraintsAnalysis_Final/codependen...
P2643170860[../../allConstraintsAnalysis_Final/codependen...
P1191760182477[../../allConstraintsAnalysis_Final/codependen...
P1629753188[../../allConstraintsAnalysis_Final/codependen...
P3610351964[../../allConstraintsAnalysis_Final/codependen...
............
P19263711424855[../../allConstraintsAnalysis_Final/codependen...
P28736428[../../allConstraintsAnalysis_Final/codependen...
P2875354716[../../allConstraintsAnalysis_Final/codependen...
P28765114[../../allConstraintsAnalysis_Final/codependen...
P290021844813[../../allConstraintsAnalysis_Final/codependen...
\n", "

527 rows × 3 columns

\n", "" ], "text/plain": [ " correct incorrect paths\n", "P2322 1207 65 [../../allConstraintsAnalysis_Final/codependen...\n", "P2643 17086 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P119 176018 2477 [../../allConstraintsAnalysis_Final/codependen...\n", "P1629 7531 88 [../../allConstraintsAnalysis_Final/codependen...\n", "P3610 3519 64 [../../allConstraintsAnalysis_Final/codependen...\n", "... ... ... ...\n", "P19 2637114 24855 [../../allConstraintsAnalysis_Final/codependen...\n", "P2873 642 8 [../../allConstraintsAnalysis_Final/codependen...\n", "P2875 3547 16 [../../allConstraintsAnalysis_Final/codependen...\n", "P2876 51 14 [../../allConstraintsAnalysis_Final/codependen...\n", "P2900 21844 813 [../../allConstraintsAnalysis_Final/codependen...\n", "\n", "[527 rows x 3 columns]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1" ] }, { "cell_type": "code", "execution_count": 55, "id": "powered-residence", "metadata": {}, "outputs": [], "source": [ "codepConstDF1['violation_ratio'] = codepConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 56, "id": "chinese-pressing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1111046327[../../allConstraintsAnalysis_Final/codependen...1.0
P2302042211[../../allConstraintsAnalysis_Final/codependen...1.0
P30630549[../../allConstraintsAnalysis_Final/codependen...1.0
P2303039[../../allConstraintsAnalysis_Final/codependen...1.0
P5447023[../../allConstraintsAnalysis_Final/codependen...1.0
P5448023[../../allConstraintsAnalysis_Final/codependen...1.0
P2308017[../../allConstraintsAnalysis_Final/codependen...1.0
P756908[../../allConstraintsAnalysis_Final/codependen...1.0
P790307[../../allConstraintsAnalysis_Final/codependen...1.0
P57404[../../allConstraintsAnalysis_Final/codependen...1.0
P230603[../../allConstraintsAnalysis_Final/codependen...1.0
P291601[../../allConstraintsAnalysis_Final/codependen...1.0
P826401[../../allConstraintsAnalysis_Final/codependen...1.0
P243301[../../allConstraintsAnalysis_Final/codependen...1.0
P230701[../../allConstraintsAnalysis_Final/codependen...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1111 0 46327 [../../allConstraintsAnalysis_Final/codependen... \n", "P2302 0 42211 [../../allConstraintsAnalysis_Final/codependen... \n", "P3063 0 549 [../../allConstraintsAnalysis_Final/codependen... \n", "P2303 0 39 [../../allConstraintsAnalysis_Final/codependen... \n", "P5447 0 23 [../../allConstraintsAnalysis_Final/codependen... \n", "P5448 0 23 [../../allConstraintsAnalysis_Final/codependen... \n", "P2308 0 17 [../../allConstraintsAnalysis_Final/codependen... \n", "P7569 0 8 [../../allConstraintsAnalysis_Final/codependen... \n", "P7903 0 7 [../../allConstraintsAnalysis_Final/codependen... \n", "P574 0 4 [../../allConstraintsAnalysis_Final/codependen... \n", "P2306 0 3 [../../allConstraintsAnalysis_Final/codependen... \n", "P2916 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P8264 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P2433 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P2307 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P1111 1.0 \n", "P2302 1.0 \n", "P3063 1.0 \n", "P2303 1.0 \n", "P5447 1.0 \n", "P5448 1.0 \n", "P2308 1.0 \n", "P7569 1.0 \n", "P7903 1.0 \n", "P574 1.0 \n", "P2306 1.0 \n", "P2916 1.0 \n", "P8264 1.0 \n", "P2433 1.0 \n", "P2307 1.0 " ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['violation_ratio', 'incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 59, "id": "demonstrated-debut", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142962988711699[../../allConstraintsAnalysis_Final/codependen...0.193676
P7342001246704728[../../allConstraintsAnalysis_Final/codependen...0.260434
P43331028893435543[../../allConstraintsAnalysis_Final/codependen...0.013842
P1951132062384602[../../allConstraintsAnalysis_Final/codependen...0.253584
P5694646728241105[../../allConstraintsAnalysis_Final/codependen...0.049328
P13110056935198870[../../allConstraintsAnalysis_Final/codependen...0.019391
P2755955123578[../../allConstraintsAnalysis_Final/codependen...0.954027
P2860174402886114713[../../allConstraintsAnalysis_Final/codependen...0.000657
P201784792101300[../../allConstraintsAnalysis_Final/codependen...0.544354
P570233261194455[../../allConstraintsAnalysis_Final/codependen...0.038917
P1435189387479479[../../allConstraintsAnalysis_Final/codependen...0.040276
P19223045163926[../../allConstraintsAnalysis_Final/codependen...0.677347
P7084525354258[../../allConstraintsAnalysis_Final/codependen...0.545246
P19711752249904[../../allConstraintsAnalysis_Final/codependen...0.298066
P15983654446915[../../allConstraintsAnalysis_Final/codependen...0.562132
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2962988 711699 [../../allConstraintsAnalysis_Final/codependen... \n", "P734 2001246 704728 [../../allConstraintsAnalysis_Final/codependen... \n", "P433 31028893 435543 [../../allConstraintsAnalysis_Final/codependen... \n", "P195 1132062 384602 [../../allConstraintsAnalysis_Final/codependen... \n", "P569 4646728 241105 [../../allConstraintsAnalysis_Final/codependen... \n", "P131 10056935 198870 [../../allConstraintsAnalysis_Final/codependen... \n", "P275 5955 123578 [../../allConstraintsAnalysis_Final/codependen... \n", "P2860 174402886 114713 [../../allConstraintsAnalysis_Final/codependen... \n", "P2017 84792 101300 [../../allConstraintsAnalysis_Final/codependen... \n", "P570 2332611 94455 [../../allConstraintsAnalysis_Final/codependen... \n", "P1435 1893874 79479 [../../allConstraintsAnalysis_Final/codependen... \n", "P1922 30451 63926 [../../allConstraintsAnalysis_Final/codependen... \n", "P708 45253 54258 [../../allConstraintsAnalysis_Final/codependen... \n", "P197 117522 49904 [../../allConstraintsAnalysis_Final/codependen... \n", "P1598 36544 46915 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P2214 0.193676 \n", "P734 0.260434 \n", "P433 0.013842 \n", "P195 0.253584 \n", "P569 0.049328 \n", "P131 0.019391 \n", "P275 0.954027 \n", "P2860 0.000657 \n", "P2017 0.544354 \n", "P570 0.038917 \n", "P1435 0.040276 \n", "P1922 0.677347 \n", "P708 0.545246 \n", "P197 0.298066 \n", "P1598 0.562132 " ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 60, "id": "developed-zimbabwe", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 527.000000\n", "mean 0.209456\n", "std 0.306132\n", "min 0.000000\n", "25% 0.001024\n", "50% 0.037037\n", "75% 0.300012\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 61, "id": "unknown-johnston", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios')" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 62, "id": "exceptional-dakota", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5')" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1[codepConstDF1['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 63, "id": "interior-joseph", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/527\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF1['violation_ratio'] >= 3.539484)}/{len(codepConstDF1)}\")" ] }, { "cell_type": "code", "execution_count": 64, "id": "english-difference", "metadata": {}, "outputs": [], "source": [ "codepConstDF1.to_csv('../../allConstraintsAnalysis_Final/codepConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "greater-genetics", "metadata": {}, "source": [ "#### Version 2 - Mand Normal" ] }, { "cell_type": "code", "execution_count": 65, "id": "constant-chance", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF2 = pd.DataFrame(codepConstViolations['Mand_Normal']).T" ] }, { "cell_type": "code", "execution_count": 66, "id": "included-adjustment", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P1196850373039[../../allConstraintsAnalysis_Final/codependen...
P24292228236[../../allConstraintsAnalysis_Final/codependen...
P184357570914889[../../allConstraintsAnalysis_Final/codependen...
P26681680[../../allConstraintsAnalysis_Final/codependen...
P13833172641[../../allConstraintsAnalysis_Final/codependen...
............
P3816177[../../allConstraintsAnalysis_Final/codependen...
P382373760[../../allConstraintsAnalysis_Final/codependen...
P383420700[../../allConstraintsAnalysis_Final/codependen...
P38429338338[../../allConstraintsAnalysis_Final/codependen...
P38583336106[../../allConstraintsAnalysis_Final/codependen...
\n", "

468 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1196 85037 3039 [../../allConstraintsAnalysis_Final/codependen...\n", "P2429 2228 236 [../../allConstraintsAnalysis_Final/codependen...\n", "P1843 575709 14889 [../../allConstraintsAnalysis_Final/codependen...\n", "P2668 168 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P1383 31726 41 [../../allConstraintsAnalysis_Final/codependen...\n", "... ... ... ...\n", "P3816 17 7 [../../allConstraintsAnalysis_Final/codependen...\n", "P3823 7376 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P3834 2070 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P3842 9338 338 [../../allConstraintsAnalysis_Final/codependen...\n", "P3858 3336 106 [../../allConstraintsAnalysis_Final/codependen...\n", "\n", "[468 rows x 3 columns]" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2" ] }, { "cell_type": "code", "execution_count": 67, "id": "fundamental-knowing", "metadata": {}, "outputs": [], "source": [ "codepConstDF2['violation_ratio'] = codepConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 68, "id": "harmful-discipline", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P790307[../../allConstraintsAnalysis_Final/codependen...1.0
P2308017[../../allConstraintsAnalysis_Final/codependen...1.0
P291601[../../allConstraintsAnalysis_Final/codependen...1.0
P243301[../../allConstraintsAnalysis_Final/codependen...1.0
P826401[../../allConstraintsAnalysis_Final/codependen...1.0
P230901[../../allConstraintsAnalysis_Final/codependen...1.0
P5447023[../../allConstraintsAnalysis_Final/codependen...1.0
P5448023[../../allConstraintsAnalysis_Final/codependen...1.0
P230701[../../allConstraintsAnalysis_Final/codependen...1.0
P1111046327[../../allConstraintsAnalysis_Final/codependen...1.0
P756908[../../allConstraintsAnalysis_Final/codependen...1.0
P57404[../../allConstraintsAnalysis_Final/codependen...1.0
P230603[../../allConstraintsAnalysis_Final/codependen...1.0
P231301[../../allConstraintsAnalysis_Final/codependen...1.0
P2303039[../../allConstraintsAnalysis_Final/codependen...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P7903 0 7 [../../allConstraintsAnalysis_Final/codependen... \n", "P2308 0 17 [../../allConstraintsAnalysis_Final/codependen... \n", "P2916 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P2433 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P8264 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P2309 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P5447 0 23 [../../allConstraintsAnalysis_Final/codependen... \n", "P5448 0 23 [../../allConstraintsAnalysis_Final/codependen... \n", "P2307 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P1111 0 46327 [../../allConstraintsAnalysis_Final/codependen... \n", "P7569 0 8 [../../allConstraintsAnalysis_Final/codependen... \n", "P574 0 4 [../../allConstraintsAnalysis_Final/codependen... \n", "P2306 0 3 [../../allConstraintsAnalysis_Final/codependen... \n", "P2313 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P2303 0 39 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P7903 1.0 \n", "P2308 1.0 \n", "P2916 1.0 \n", "P2433 1.0 \n", "P8264 1.0 \n", "P2309 1.0 \n", "P5447 1.0 \n", "P5448 1.0 \n", "P2307 1.0 \n", "P1111 1.0 \n", "P7569 1.0 \n", "P574 1.0 \n", "P2306 1.0 \n", "P2313 1.0 \n", "P2303 1.0 " ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 69, "id": "unlikely-chamber", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142962988711699[../../allConstraintsAnalysis_Final/codependen...0.193676
P43331028893435543[../../allConstraintsAnalysis_Final/codependen...0.013842
P2755955123578[../../allConstraintsAnalysis_Final/codependen...0.954027
P2860174402886114713[../../allConstraintsAnalysis_Final/codependen...0.000657
P1435189387479479[../../allConstraintsAnalysis_Final/codependen...0.040276
P7084525354258[../../allConstraintsAnalysis_Final/codependen...0.545246
P19711752249904[../../allConstraintsAnalysis_Final/codependen...0.298066
P15983697846481[../../allConstraintsAnalysis_Final/codependen...0.556932
P1111046327[../../allConstraintsAnalysis_Final/codependen...1.000000
P2248402041566[../../allConstraintsAnalysis_Final/codependen...0.911815
P2325407140611[../../allConstraintsAnalysis_Final/codependen...0.908889
P856123929238107[../../allConstraintsAnalysis_Final/codependen...0.029832
P2243402536540[../../allConstraintsAnalysis_Final/codependen...0.900777
P2244402736527[../../allConstraintsAnalysis_Final/codependen...0.900700
P41335779333607[../../allConstraintsAnalysis_Final/codependen...0.085864
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2962988 711699 [../../allConstraintsAnalysis_Final/codependen... \n", "P433 31028893 435543 [../../allConstraintsAnalysis_Final/codependen... \n", "P275 5955 123578 [../../allConstraintsAnalysis_Final/codependen... \n", "P2860 174402886 114713 [../../allConstraintsAnalysis_Final/codependen... \n", "P1435 1893874 79479 [../../allConstraintsAnalysis_Final/codependen... \n", "P708 45253 54258 [../../allConstraintsAnalysis_Final/codependen... \n", "P197 117522 49904 [../../allConstraintsAnalysis_Final/codependen... \n", "P1598 36978 46481 [../../allConstraintsAnalysis_Final/codependen... \n", "P1111 0 46327 [../../allConstraintsAnalysis_Final/codependen... \n", "P2248 4020 41566 [../../allConstraintsAnalysis_Final/codependen... \n", "P2325 4071 40611 [../../allConstraintsAnalysis_Final/codependen... \n", "P856 1239292 38107 [../../allConstraintsAnalysis_Final/codependen... \n", "P2243 4025 36540 [../../allConstraintsAnalysis_Final/codependen... \n", "P2244 4027 36527 [../../allConstraintsAnalysis_Final/codependen... \n", "P413 357793 33607 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P2214 0.193676 \n", "P433 0.013842 \n", "P275 0.954027 \n", "P2860 0.000657 \n", "P1435 0.040276 \n", "P708 0.545246 \n", "P197 0.298066 \n", "P1598 0.556932 \n", "P1111 1.000000 \n", "P2248 0.911815 \n", "P2325 0.908889 \n", "P856 0.029832 \n", "P2243 0.900777 \n", "P2244 0.900700 \n", "P413 0.085864 " ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 70, "id": "violent-match", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 468.000000\n", "mean 0.171527\n", "std 0.285675\n", "min 0.000000\n", "25% 0.000690\n", "50% 0.017889\n", "75% 0.208292\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 71, "id": "educational-thickness", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios')" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 72, "id": "latin-mitchell", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5')" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2[codepConstDF2['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 73, "id": "asian-forwarding", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/468\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF2['violation_ratio'] >= 2.290915)}/{len(codepConstDF2)}\")" ] }, { "cell_type": "markdown", "id": "destroyed-flash", "metadata": {}, "source": [ "#### Version 3 - Mand" ] }, { "cell_type": "code", "execution_count": 74, "id": "consecutive-plenty", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF3 = pd.DataFrame(codepConstViolations['Mand']).T" ] }, { "cell_type": "code", "execution_count": 75, "id": "digital-mileage", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P202123031[../../allConstraintsAnalysis_Final/codependen...
P37443110[../../allConstraintsAnalysis_Final/codependen...
P598213260[../../allConstraintsAnalysis_Final/codependen...
P1081123000[../../allConstraintsAnalysis_Final/codependen...
P2095383[../../allConstraintsAnalysis_Final/codependen...
............
P5172400[../../allConstraintsAnalysis_Final/codependen...
P52117171[../../allConstraintsAnalysis_Final/codependen...
P55538761[../../allConstraintsAnalysis_Final/codependen...
P562314720[../../allConstraintsAnalysis_Final/codependen...
P56438992[../../allConstraintsAnalysis_Final/codependen...
\n", "

78 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P2021 2303 1 [../../allConstraintsAnalysis_Final/codependen...\n", "P3744 311 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P5982 1326 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P1081 12300 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P2095 38 3 [../../allConstraintsAnalysis_Final/codependen...\n", "... ... ... ...\n", "P517 240 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P521 1717 1 [../../allConstraintsAnalysis_Final/codependen...\n", "P555 3876 1 [../../allConstraintsAnalysis_Final/codependen...\n", "P5623 1472 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P564 3899 2 [../../allConstraintsAnalysis_Final/codependen...\n", "\n", "[78 rows x 3 columns]" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3" ] }, { "cell_type": "code", "execution_count": 76, "id": "formed-battle", "metadata": {}, "outputs": [], "source": [ "codepConstDF3['violation_ratio'] = codepConstDF3.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 77, "id": "numerous-construction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P5051163[../../allConstraintsAnalysis_Final/codependen...63.000000
P434171[../../allConstraintsAnalysis_Final/codependen...0.142857
P2095383[../../allConstraintsAnalysis_Final/codependen...0.078947
P99096641[../../allConstraintsAnalysis_Final/codependen...0.042443
P3931225064[../../allConstraintsAnalysis_Final/codependen...0.028444
P17313639[../../allConstraintsAnalysis_Final/codependen...0.024793
P2009101915[../../allConstraintsAnalysis_Final/codependen...0.014720
P2461692[../../allConstraintsAnalysis_Final/codependen...0.011834
P826401[../../allConstraintsAnalysis_Final/codependen...0.010000
P16358907[../../allConstraintsAnalysis_Final/codependen...0.007865
P9445774[../../allConstraintsAnalysis_Final/codependen...0.006932
P1560323520[../../allConstraintsAnalysis_Final/codependen...0.006182
P26798875[../../allConstraintsAnalysis_Final/codependen...0.005637
P4511050236[../../allConstraintsAnalysis_Final/codependen...0.003428
P236516505[../../allConstraintsAnalysis_Final/codependen...0.003030
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5051 1 63 [../../allConstraintsAnalysis_Final/codependen... \n", "P4341 7 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P2095 38 3 [../../allConstraintsAnalysis_Final/codependen... \n", "P990 966 41 [../../allConstraintsAnalysis_Final/codependen... \n", "P3931 2250 64 [../../allConstraintsAnalysis_Final/codependen... \n", "P1731 363 9 [../../allConstraintsAnalysis_Final/codependen... \n", "P2009 1019 15 [../../allConstraintsAnalysis_Final/codependen... \n", "P246 169 2 [../../allConstraintsAnalysis_Final/codependen... \n", "P8264 0 1 [../../allConstraintsAnalysis_Final/codependen... \n", "P1635 890 7 [../../allConstraintsAnalysis_Final/codependen... \n", "P944 577 4 [../../allConstraintsAnalysis_Final/codependen... \n", "P1560 3235 20 [../../allConstraintsAnalysis_Final/codependen... \n", "P2679 887 5 [../../allConstraintsAnalysis_Final/codependen... \n", "P451 10502 36 [../../allConstraintsAnalysis_Final/codependen... \n", "P2365 1650 5 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P5051 63.000000 \n", "P4341 0.142857 \n", "P2095 0.078947 \n", "P990 0.042443 \n", "P3931 0.028444 \n", "P1731 0.024793 \n", "P2009 0.014720 \n", "P246 0.011834 \n", "P8264 0.010000 \n", "P1635 0.007865 \n", "P944 0.006932 \n", "P1560 0.006182 \n", "P2679 0.005637 \n", "P451 0.003428 \n", "P2365 0.003030 " ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 81, "id": "imposed-bibliography", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P7959655743376[../../allConstraintsAnalysis_Final/codependen...0.000573
P3931225064[../../allConstraintsAnalysis_Final/codependen...0.028444
P5051163[../../allConstraintsAnalysis_Final/codependen...63.000000
P9152836445[../../allConstraintsAnalysis_Final/codependen...0.001587
P99096641[../../allConstraintsAnalysis_Final/codependen...0.042443
P4511050236[../../allConstraintsAnalysis_Final/codependen...0.003428
P19716739234[../../allConstraintsAnalysis_Final/codependen...0.000203
P1560323520[../../allConstraintsAnalysis_Final/codependen...0.006182
P2009101915[../../allConstraintsAnalysis_Final/codependen...0.014720
P17313639[../../allConstraintsAnalysis_Final/codependen...0.024793
P16358907[../../allConstraintsAnalysis_Final/codependen...0.007865
P1196880706[../../allConstraintsAnalysis_Final/codependen...0.000068
P26798875[../../allConstraintsAnalysis_Final/codependen...0.005637
P236516505[../../allConstraintsAnalysis_Final/codependen...0.003030
P1411217094[../../allConstraintsAnalysis_Final/codependen...0.000033
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P7959 655743 376 [../../allConstraintsAnalysis_Final/codependen... \n", "P3931 2250 64 [../../allConstraintsAnalysis_Final/codependen... \n", "P5051 1 63 [../../allConstraintsAnalysis_Final/codependen... \n", "P915 28364 45 [../../allConstraintsAnalysis_Final/codependen... \n", "P990 966 41 [../../allConstraintsAnalysis_Final/codependen... \n", "P451 10502 36 [../../allConstraintsAnalysis_Final/codependen... \n", "P197 167392 34 [../../allConstraintsAnalysis_Final/codependen... \n", "P1560 3235 20 [../../allConstraintsAnalysis_Final/codependen... \n", "P2009 1019 15 [../../allConstraintsAnalysis_Final/codependen... \n", "P1731 363 9 [../../allConstraintsAnalysis_Final/codependen... \n", "P1635 890 7 [../../allConstraintsAnalysis_Final/codependen... \n", "P1196 88070 6 [../../allConstraintsAnalysis_Final/codependen... \n", "P2679 887 5 [../../allConstraintsAnalysis_Final/codependen... \n", "P2365 1650 5 [../../allConstraintsAnalysis_Final/codependen... \n", "P141 121709 4 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P7959 0.000573 \n", "P3931 0.028444 \n", "P5051 63.000000 \n", "P915 0.001587 \n", "P990 0.042443 \n", "P451 0.003428 \n", "P197 0.000203 \n", "P1560 0.006182 \n", "P2009 0.014720 \n", "P1731 0.024793 \n", "P1635 0.007865 \n", "P1196 0.000068 \n", "P2679 0.005637 \n", "P2365 0.003030 \n", "P141 0.000033 " ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 82, "id": "emotional-crown", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 78.000000\n", "mean 0.812773\n", "std 7.132789\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 0.000641\n", "max 63.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 83, "id": "certain-freeze", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios')" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 84, "id": "cooperative-ownership", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005')" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3[codepConstDF3['violation_ratio'] <= 0.0005].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005\")" ] }, { "cell_type": "code", "execution_count": 85, "id": "studied-inclusion", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 1/78\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF3['violation_ratio'] >= 0.922928)}/{len(codepConstDF3)}\")" ] }, { "cell_type": "markdown", "id": "protective-brazil", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 86, "id": "laughing-pressing", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF4 = pd.DataFrame(codepConstViolations['Normal']).T" ] }, { "cell_type": "code", "execution_count": 87, "id": "loving-swift", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P3880242457460[../../allConstraintsAnalysis_Final/codependen...
P70802120[../../allConstraintsAnalysis_Final/codependen...
P1540236379456[../../allConstraintsAnalysis_Final/codependen...
P434180[../../allConstraintsAnalysis_Final/codependen...
P50128110[../../allConstraintsAnalysis_Final/codependen...
............
P42134254213[../../allConstraintsAnalysis_Final/codependen...
P423825811[../../allConstraintsAnalysis_Final/codependen...
P4272723226[../../allConstraintsAnalysis_Final/codependen...
P431618372[../../allConstraintsAnalysis_Final/codependen...
P43331028893435543[../../allConstraintsAnalysis_Final/codependen...
\n", "

418 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P3880 242457 460 [../../allConstraintsAnalysis_Final/codependen...\n", "P7080 212 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P1540 236379 456 [../../allConstraintsAnalysis_Final/codependen...\n", "P4341 8 0 [../../allConstraintsAnalysis_Final/codependen...\n", "P5012 81 10 [../../allConstraintsAnalysis_Final/codependen...\n", "... ... ... ...\n", "P4213 42542 13 [../../allConstraintsAnalysis_Final/codependen...\n", "P4238 258 11 [../../allConstraintsAnalysis_Final/codependen...\n", "P427 27232 26 [../../allConstraintsAnalysis_Final/codependen...\n", "P4316 183 72 [../../allConstraintsAnalysis_Final/codependen...\n", "P433 31028893 435543 [../../allConstraintsAnalysis_Final/codependen...\n", "\n", "[418 rows x 3 columns]" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4" ] }, { "cell_type": "code", "execution_count": 88, "id": "north-christian", "metadata": {}, "outputs": [], "source": [ "codepConstDF4['violation_ratio'] = codepConstDF4.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 89, "id": "closing-causing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1111046327[../../allConstraintsAnalysis_Final/codependen...463.270000
P1995809985[../../allConstraintsAnalysis_Final/codependen...124.812500
P76813315143[../../allConstraintsAnalysis_Final/codependen...113.857143
P450141922682[../../allConstraintsAnalysis_Final/codependen...54.133652
P27154158[../../allConstraintsAnalysis_Final/codependen...39.500000
P2755955123578[../../allConstraintsAnalysis_Final/codependen...20.751973
P2376119[../../allConstraintsAnalysis_Final/codependen...19.000000
P39122973705[../../allConstraintsAnalysis_Final/codependen...12.474747
P272012134[../../allConstraintsAnalysis_Final/codependen...11.166667
P2248402041566[../../allConstraintsAnalysis_Final/codependen...10.339801
P2325407140611[../../allConstraintsAnalysis_Final/codependen...9.975682
P2243402536540[../../allConstraintsAnalysis_Final/codependen...9.078261
P2244402736527[../../allConstraintsAnalysis_Final/codependen...9.070524
P34485474302[../../allConstraintsAnalysis_Final/codependen...7.864717
P770765445[../../allConstraintsAnalysis_Final/codependen...6.846154
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1111 0 46327 [../../allConstraintsAnalysis_Final/codependen... \n", "P1995 80 9985 [../../allConstraintsAnalysis_Final/codependen... \n", "P768 133 15143 [../../allConstraintsAnalysis_Final/codependen... \n", "P4501 419 22682 [../../allConstraintsAnalysis_Final/codependen... \n", "P2715 4 158 [../../allConstraintsAnalysis_Final/codependen... \n", "P275 5955 123578 [../../allConstraintsAnalysis_Final/codependen... \n", "P2376 1 19 [../../allConstraintsAnalysis_Final/codependen... \n", "P3912 297 3705 [../../allConstraintsAnalysis_Final/codependen... \n", "P2720 12 134 [../../allConstraintsAnalysis_Final/codependen... \n", "P2248 4020 41566 [../../allConstraintsAnalysis_Final/codependen... \n", "P2325 4071 40611 [../../allConstraintsAnalysis_Final/codependen... \n", "P2243 4025 36540 [../../allConstraintsAnalysis_Final/codependen... \n", "P2244 4027 36527 [../../allConstraintsAnalysis_Final/codependen... \n", "P3448 547 4302 [../../allConstraintsAnalysis_Final/codependen... \n", "P7707 65 445 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P1111 463.270000 \n", "P1995 124.812500 \n", "P768 113.857143 \n", "P4501 54.133652 \n", "P2715 39.500000 \n", "P275 20.751973 \n", "P2376 19.000000 \n", "P3912 12.474747 \n", "P2720 11.166667 \n", "P2248 10.339801 \n", "P2325 9.975682 \n", "P2243 9.078261 \n", "P2244 9.070524 \n", "P3448 7.864717 \n", "P7707 6.846154 " ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 91, "id": "brief-effect", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142962988711699[../../allConstraintsAnalysis_Final/codependen...0.240196
P43331028893435543[../../allConstraintsAnalysis_Final/codependen...0.014037
P2755955123578[../../allConstraintsAnalysis_Final/codependen...20.751973
P2860174402886114713[../../allConstraintsAnalysis_Final/codependen...0.000658
P1435189387479479[../../allConstraintsAnalysis_Final/codependen...0.041966
P7084525354258[../../allConstraintsAnalysis_Final/codependen...1.198992
P19711752349903[../../allConstraintsAnalysis_Final/codependen...0.424623
P15983697846481[../../allConstraintsAnalysis_Final/codependen...1.256991
P1111046327[../../allConstraintsAnalysis_Final/codependen...463.270000
P2248402041566[../../allConstraintsAnalysis_Final/codependen...10.339801
P2325407140611[../../allConstraintsAnalysis_Final/codependen...9.975682
P856123929238107[../../allConstraintsAnalysis_Final/codependen...0.030749
P2243402536540[../../allConstraintsAnalysis_Final/codependen...9.078261
P2244402736527[../../allConstraintsAnalysis_Final/codependen...9.070524
P41335779333607[../../allConstraintsAnalysis_Final/codependen...0.093929
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2962988 711699 [../../allConstraintsAnalysis_Final/codependen... \n", "P433 31028893 435543 [../../allConstraintsAnalysis_Final/codependen... \n", "P275 5955 123578 [../../allConstraintsAnalysis_Final/codependen... \n", "P2860 174402886 114713 [../../allConstraintsAnalysis_Final/codependen... \n", "P1435 1893874 79479 [../../allConstraintsAnalysis_Final/codependen... \n", "P708 45253 54258 [../../allConstraintsAnalysis_Final/codependen... \n", "P197 117523 49903 [../../allConstraintsAnalysis_Final/codependen... \n", "P1598 36978 46481 [../../allConstraintsAnalysis_Final/codependen... \n", "P1111 0 46327 [../../allConstraintsAnalysis_Final/codependen... \n", "P2248 4020 41566 [../../allConstraintsAnalysis_Final/codependen... \n", "P2325 4071 40611 [../../allConstraintsAnalysis_Final/codependen... \n", "P856 1239292 38107 [../../allConstraintsAnalysis_Final/codependen... \n", "P2243 4025 36540 [../../allConstraintsAnalysis_Final/codependen... \n", "P2244 4027 36527 [../../allConstraintsAnalysis_Final/codependen... \n", "P413 357793 33607 [../../allConstraintsAnalysis_Final/codependen... \n", "\n", " violation_ratio \n", "P2214 0.240196 \n", "P433 0.014037 \n", "P275 20.751973 \n", "P2860 0.000658 \n", "P1435 0.041966 \n", "P708 1.198992 \n", "P197 0.424623 \n", "P1598 1.256991 \n", "P1111 463.270000 \n", "P2248 10.339801 \n", "P2325 9.975682 \n", "P856 0.030749 \n", "P2243 9.078261 \n", "P2244 9.070524 \n", "P413 0.093929 " ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 92, "id": "wireless-passenger", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 418.000000\n", "mean 2.454376\n", "std 24.333809\n", "min 0.000000\n", "25% 0.001419\n", "50% 0.026024\n", "75% 0.237647\n", "max 463.270000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 93, "id": "civilian-arnold", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios')" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 94, "id": "threaded-cooler", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5')" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4[codepConstDF4['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 95, "id": "olympic-charlotte", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 25/418\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF4['violation_ratio'] >= 2.414703)}/{len(codepConstDF4)}\")" ] }, { "cell_type": "markdown", "id": "published-affiliate", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 2, "id": "aggregate-conservative", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "544889064f324f8ca261b134221aad63", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/52 [00:00(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "silent-fundamentals", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 154, "id": "known-wednesday", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-15 03:24:06 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510862']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510862)\" \\\n", " -o ../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 155, "id": "legal-diamond", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 156, "id": "exceptional-morris", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 157, "id": "burning-involvement", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 158, "id": "naval-identification", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 159, "id": "considered-madison", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 160, "id": "alone-cattle", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2316', 'P2303'], dtype=object)" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 161, "id": "mighty-ordinary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2316 42\n", "P2303 3\n", "Name: label, dtype: int64" ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 162, "id": "sensitive-alliance", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 163, "id": "tender-valley", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1id
P1322P1322-P2302-Q21510862-85dea891-0NaN[Normal]
P1327P1327-P2302-Q21510862-a3c3a094-0NaN[Normal]
P1382P1382-P2302-Q21510862-f6bcfecf-0NaN[Normal]
P1560P1560-P2302-Q21510862-fabecaeb-0NaN[Q21502408]
P1639P1639-P2302-Q21510862-384edcd4-0NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 id \n", "P1322 P1322-P2302-Q21510862-85dea891-0 NaN [Normal]\n", "P1327 P1327-P2302-Q21510862-a3c3a094-0 NaN [Normal]\n", "P1382 P1382-P2302-Q21510862-f6bcfecf-0 NaN [Normal]\n", "P1560 P1560-P2302-Q21510862-fabecaeb-0 NaN [Q21502408]\n", "P1639 P1639-P2302-Q21510862-384edcd4-0 NaN [Q21502408]" ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 164, "id": "cellular-canal", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 165, "id": "desperate-poster", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1
P1322NaN[Normal]
P1327NaN[Normal]
P1382NaN[Normal]
P1560NaN[Q21502408]
P1639NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 \n", "P1322 NaN [Normal]\n", "P1327 NaN [Normal]\n", "P1382 NaN [Normal]\n", "P1560 NaN [Q21502408]\n", "P1639 NaN [Q21502408]" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "primary-netherlands", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 166, "id": "pointed-haven", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c5a4cef38c4c441ea32086962d3ff9f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "\n", "folderName = 'symmetricConstraint'\n", "shellFileSuffix = 'symmConst_Validator_'\n", "graph_cache_prefix = 'symm_new_1_1_'\n", "\n", "for row in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " prop = row[0]\n", " constraint = row[1]\n", " mandatory = []\n", " suggestion = []\n", " normal = []\n", " prop = str(prop)\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " sfname = 'mandatory'\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " sfname = 'suggestion'\n", " elif constraint['P2316'][0] == 'Normal':\n", " sfname = 'normal'\n", " else:\n", " sfname = 'normal'\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplit_Final/claims.\"+ prop +\".copy2.tsv \\\n", " --match 'tsv: (node1)-[nodeProp]->(node2), copy2: (node2)-[]->(node1)' \"\n", " \n", " os.system(\"cp ../../propertiesSplit_Final/claims.\"+ prop +\".tsv ../../propertiesSplit_Final/claims.\"+ prop +\".copy2.tsv\")\n", " \n", " if cnt % 60 == 0:\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " command\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = constraint['P2303']\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)" ] }, { "cell_type": "code", "execution_count": 167, "id": "polar-canada", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "38" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 168, "id": "virtual-disney", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,3):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/symmConst_Validator_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "coral-cheese", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 108, "id": "governmental-backup", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "51714985d51049f9b20d98517ba65416", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7eac3a245b804992ae3c65c43665e227", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/13 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2152751[../../allConstraintsAnalysis_Final/symmetricC...0.013158
P1639205921[../../allConstraintsAnalysis_Final/symmetricC...0.010096
P1560323520[../../allConstraintsAnalysis_Final/symmetricC...0.006144
P336417840[../../allConstraintsAnalysis_Final/symmetricC...0.000000
P61852790[../../allConstraintsAnalysis_Final/symmetricC...0.000000
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2152 75 1 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P1639 2059 21 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P1560 3235 20 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P3364 1784 0 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P6185 279 0 [../../allConstraintsAnalysis_Final/symmetricC... \n", "\n", " violation_ratio \n", "P2152 0.013158 \n", "P1639 0.010096 \n", "P1560 0.006144 \n", "P3364 0.000000 \n", "P6185 0.000000 " ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF1 = pd.DataFrame(symmConstViolations['mandatory']).T\n", "symmConstDF1['violation_ratio'] = symmConstDF1.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 102, "id": "gross-extraction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P278910353622758[../../allConstraintsAnalysis_Final/symmetricC...0.180199
P188950583728391[../../allConstraintsAnalysis_Final/symmetricC...0.053144
P1971643493077[../../allConstraintsAnalysis_Final/symmetricC...0.018378
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2789 103536 22758 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P1889 505837 28391 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P197 164349 3077 [../../allConstraintsAnalysis_Final/symmetricC... \n", "\n", " violation_ratio \n", "P2789 0.180199 \n", "P1889 0.053144 \n", "P197 0.018378 " ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF2 = pd.DataFrame(symmConstViolations['suggestion']).T\n", "symmConstDF2['violation_ratio'] = symmConstDF2.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 103, "id": "heavy-scout", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P518802[../../allConstraintsAnalysis_Final/symmetricC...1.000000
P1706452[../../allConstraintsAnalysis_Final/symmetricC...0.928571
P5214181300[../../allConstraintsAnalysis_Final/symmetricC...0.756694
P2652460852[../../allConstraintsAnalysis_Final/symmetricC...0.649390
P870262[../../allConstraintsAnalysis_Final/symmetricC...0.250000
P1382106222811[../../allConstraintsAnalysis_Final/symmetricC...0.209261
P229399802230[../../allConstraintsAnalysis_Final/symmetricC...0.182637
P30321674345[../../allConstraintsAnalysis_Final/symmetricC...0.170877
P45195201018[../../allConstraintsAnalysis_Final/symmetricC...0.096603
P13277734758[../../allConstraintsAnalysis_Final/symmetricC...0.089260
P491532828[../../allConstraintsAnalysis_Final/symmetricC...0.078652
P34032126114[../../allConstraintsAnalysis_Final/symmetricC...0.050893
P4754825129281[../../allConstraintsAnalysis_Final/symmetricC...0.050700
P46023896711571[../../allConstraintsAnalysis_Final/symmetricC...0.046185
P5306595319[../../allConstraintsAnalysis_Final/symmetricC...0.046138
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5188 0 2 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P1706 4 52 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P521 418 1300 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P2652 460 852 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P8702 6 2 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P1382 10622 2811 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P2293 9980 2230 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P3032 1674 345 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P451 9520 1018 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P1327 7734 758 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P4915 328 28 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P3403 2126 114 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P47 548251 29281 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P460 238967 11571 [../../allConstraintsAnalysis_Final/symmetricC... \n", "P530 6595 319 [../../allConstraintsAnalysis_Final/symmetricC... \n", "\n", " violation_ratio \n", "P5188 1.000000 \n", "P1706 0.928571 \n", "P521 0.756694 \n", "P2652 0.649390 \n", "P8702 0.250000 \n", "P1382 0.209261 \n", "P2293 0.182637 \n", "P3032 0.170877 \n", "P451 0.096603 \n", "P1327 0.089260 \n", "P4915 0.078652 \n", "P3403 0.050893 \n", "P47 0.050700 \n", "P460 0.046185 \n", "P530 0.046138 " ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF3 = pd.DataFrame(symmConstViolations['normal']).T\n", "symmConstDF3['violation_ratio'] = symmConstDF3.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 105, "id": "legitimate-aspect", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 106, "id": "junior-marketing", "metadata": {}, "outputs": [], "source": [ "pd.concat([symmConstDF1, symmConstDF2, symmConstDF3]).to_csv('../../allConstraintsAnalysis_Final/symmConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "unlikely-sewing", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 135, "id": "southern-reasoning", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a6d9bf7b68ec4aa6a5403175de15cd14", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/52 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "informed-animal", "metadata": {}, "source": [ "## Inverse Constraint (Q21510855)\n", "\n", "This constraint says, if node1 has a property with this constraint, then node2 must have the specified property and the value must be node1." ] }, { "cell_type": "markdown", "id": "dramatic-manchester", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 169, "id": "leading-server", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-15 03:26:20 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510855']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510855)\" \\\n", " -o ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 171, "id": "received-colonial", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 172, "id": "overall-expense", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 173, "id": "valid-throat", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 174, "id": "focused-pennsylvania", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/inverseConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 175, "id": "moved-rental", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 176, "id": "attached-rings", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2316', 'P4155', 'P2303'], dtype=object)" ] }, "execution_count": 176, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 177, "id": "loving-mileage", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 110\n", "P2316 10\n", "P2303 2\n", "P4155 1\n", "Name: label, dtype: int64" ] }, "execution_count": 177, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 178, "id": "local-forty", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 179, "id": "pressed-upset", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1id
P1026P1026-P2302-Q21510855-adc83b86-0NaN[P50]NaNNaN
P1029P1029-P2302-Q21510855-6b55e057-0NaN[P5096]NaNNaN
P115P115-P2302-Q21510855-f7aa0b78-0NaN[P466]NaNNaN
P1151P1151-P2302-Q21510855-0d9aa9c6-0NaN[P1204][Q21502408]NaN
P1204P1204-P2302-Q21510855-e3d53bb6-0NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 id \n", "P1026 P1026-P2302-Q21510855-adc83b86-0 NaN [P50] NaN NaN\n", "P1029 P1029-P2302-Q21510855-6b55e057-0 NaN [P5096] NaN NaN\n", "P115 P115-P2302-Q21510855-f7aa0b78-0 NaN [P466] NaN NaN\n", "P1151 P1151-P2302-Q21510855-0d9aa9c6-0 NaN [P1204] [Q21502408] NaN\n", "P1204 P1204-P2302-Q21510855-e3d53bb6-0 NaN [P1151] NaN NaN" ] }, "execution_count": 179, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 180, "id": "extra-stomach", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 181, "id": "seeing-marine", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1
P1026NaN[P50]NaNNaN
P1029NaN[P5096]NaNNaN
P115NaN[P466]NaNNaN
P1151NaN[P1204][Q21502408]NaN
P1204NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 \n", "P1026 NaN [P50] NaN NaN\n", "P1029 NaN [P5096] NaN NaN\n", "P115 NaN [P466] NaN NaN\n", "P1151 NaN [P1204] [Q21502408] NaN\n", "P1204 NaN [P1151] NaN NaN" ] }, "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "composite-cutting", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 182, "id": "acoustic-belarus", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "15f80aaf576c45f5bdcd3759e7df4644", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "fOP = None\n", "\n", "folderName = 'inverseConstraint'\n", "shellFileSuffix = 'invConst_Validator_'\n", "graph_cache_file_prefix = \"inv_new_1_1_\"\n", "\n", "for prop, constraint in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " subFolderName = \"mandatory\"\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " subFolderName = \"suggestion\"\n", " else:\n", " subFolderName = \"normal\"\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " prop2 = constraint['P2306']\n", "\n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", "\n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit_Final/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " if cnt % 60 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit_Final/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplit_Final/claims.\"+ prop2 +\".tsv \\\n", " --match '\"+ \\\n", " f\"{prop}: (node1)-[nodeProp]->(node2), {prop2}: (node2)-[]->(node1)' \"\n", "\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = set(constraint['P2303'])\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit_Final/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", "# print(command) \n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis_Final/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit_Final/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 183, "id": "large-climb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "110" ] }, "execution_count": 183, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 184, "id": "involved-vietnamese", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,7):\n", "# os.system(\"screen -dm sh ../../propertiesSplit_Final/checkViolations/invConst_Validator_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "retired-audio", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 113, "id": "specified-evanescence", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e2f270487d664d179f20852ca889bea2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "176eec50bfb740d29687850719d89ed6", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/12 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P267381969[../../allConstraintsAnalysis_Final/inverseCon...0.077703
P41472719[../../allConstraintsAnalysis_Final/inverseCon...0.032143
P41492724[../../allConstraintsAnalysis_Final/inverseCon...0.014493
P2033183626[../../allConstraintsAnalysis_Final/inverseCon...0.013963
P450175421[../../allConstraintsAnalysis_Final/inverseCon...0.011831
P115116124[../../allConstraintsAnalysis_Final/inverseCon...0.002475
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2673 819 69 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P4147 271 9 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P4149 272 4 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P2033 1836 26 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P450 1754 21 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P1151 1612 4 [../../allConstraintsAnalysis_Final/inverseCon... \n", "\n", " violation_ratio \n", "P2673 0.077703 \n", "P4147 0.032143 \n", "P4149 0.014493 \n", "P2033 0.013963 \n", "P450 0.011831 \n", "P1151 0.002475 " ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1 = pd.DataFrame(invConstViolations['mandatory']).T\n", "invConstDF1['violation_ratio'] = invConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 119, "id": "valid-symposium", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P143435124745[../../allConstraintsAnalysis_Final/inverseCon...0.574664
P15596320549783[../../allConstraintsAnalysis_Final/inverseCon...0.049145
P15696318340682[../../allConstraintsAnalysis_Final/inverseCon...0.040525
P62972202141[../../allConstraintsAnalysis_Final/inverseCon...0.001949
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1434 3512 4745 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P155 963205 49783 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P156 963183 40682 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P629 72202 141 [../../allConstraintsAnalysis_Final/inverseCon... \n", "\n", " violation_ratio \n", "P1434 0.574664 \n", "P155 0.049145 \n", "P156 0.040525 \n", "P629 0.001949 " ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF2 = pd.DataFrame(invConstViolations['suggestion']).T\n", "invConstDF2['violation_ratio'] = invConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 120, "id": "resident-mustang", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P160512189[../../allConstraintsAnalysis_Final/inverseCon...0.940299
P34485834266[../../allConstraintsAnalysis_Final/inverseCon...0.879769
P92616[../../allConstraintsAnalysis_Final/inverseCon...0.857143
P92515[../../allConstraintsAnalysis_Final/inverseCon...0.833333
P10294752263[../../allConstraintsAnalysis_Final/inverseCon...0.826516
P115671224515[../../allConstraintsAnalysis_Final/inverseCon...0.785058
P8625717[../../allConstraintsAnalysis_Final/inverseCon...0.708333
P51328189[../../allConstraintsAnalysis_Final/inverseCon...0.523529
P42525122024[../../allConstraintsAnalysis_Final/inverseCon...0.446208
P38161410[../../allConstraintsAnalysis_Final/inverseCon...0.416667
P2512210149[../../allConstraintsAnalysis_Final/inverseCon...0.415042
P167764[../../allConstraintsAnalysis_Final/inverseCon...0.400000
P2578989604[../../allConstraintsAnalysis_Final/inverseCon...0.379159
P3261232125[../../allConstraintsAnalysis_Final/inverseCon...0.350140
P5681049450[../../allConstraintsAnalysis_Final/inverseCon...0.300200
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1605 12 189 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P3448 583 4266 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P926 1 6 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P925 1 5 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P1029 475 2263 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P115 6712 24515 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P8625 7 17 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P5132 81 89 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P425 2512 2024 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P3816 14 10 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P2512 210 149 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P1677 6 4 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P2578 989 604 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P3261 232 125 [../../allConstraintsAnalysis_Final/inverseCon... \n", "P568 1049 450 [../../allConstraintsAnalysis_Final/inverseCon... \n", "\n", " violation_ratio \n", "P1605 0.940299 \n", "P3448 0.879769 \n", "P926 0.857143 \n", "P925 0.833333 \n", "P1029 0.826516 \n", "P115 0.785058 \n", "P8625 0.708333 \n", "P5132 0.523529 \n", "P425 0.446208 \n", "P3816 0.416667 \n", "P2512 0.415042 \n", "P1677 0.400000 \n", "P2578 0.379159 \n", "P3261 0.350140 \n", "P568 0.300200 " ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF3 = pd.DataFrame(invConstViolations['normal']).T\n", "invConstDF3['violation_ratio'] = invConstDF3.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 122, "id": "entire-gauge", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEICAYAAABYoZ8gAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAZmklEQVR4nO3debhddX3v8feHhNEEAuSQMnoEGQyCwA1gHSoWVAYZ1NYaGwQvEMdevNBWirQgWsWrDF5bBRSeBFBkUkwFWwGJqZbBoIgMUhACZIAENCTBCAa+/eP3O2GxOfucdU7O2js5v8/rec5z1ry+a/rstdZee29FBGZmVo71ul2AmZl1loPfzKwwDn4zs8I4+M3MCuPgNzMrjIPfzKwwDv5CSDpf0j92u46RJmm2pOO7XcdIW9u2l6QfSDqmxnC9kkLS2GHO51RJ3xjOuJ0kaQdJKySN6XYtw+Hgr5D0Jkn/JelpSb+V9FNJ+3a7roFIOlbSTwYbLiI+HBGfGcb050laLOkVlW7HS5o91Gl1g6RdJF0l6cm8Xe+SdFKTB2xeZwetyTSGsr0kzZD02eHOK7/IXNJP99dJelbSFhFxSETMHO482sz3AEnzq90i4nMRMeIv5Pk4eT6H9TJJv5T0ziGM/5JtGhGPRsS4iHh+pGvtBAd/JmlT4PvAV4AtgG2BTwPPdrOukTACITcGOHEE6pCkju1zknYCbgMeA/aIiM2AvwSmAOM7VUc/dQ3rbLhBM4F3V1/cs6OB70fEb7tQUxNuiYhxwATgq8C3JU3oakXdEhH+S59engIsbdNvA+C3pPDo67YV8HugBzgAmA/8PbAYWAQcBRwK/Hce99TKuGcAVwGXAcuBXwG7AP+Qx38MeHtl+M2Ai/J0FwCfJYXxa4A/AM8DK/rqB2YAXwOuB54BDsrdPluZ5pHAncAy4DfAwW2WfR5wSl6GCbnb8cDsyjBvAH4GPJ3/v6HSbzbwz8BPgZXAq4EAPgo8kJf/M8BOwH/leq4ENsjjb056QV4C/C43b9cy/ePb1H4ZcN0g2/0I4B5gaZ7Wa1qW/W+Bu/KyXQFslPtNzLUszevmP0knUpcCL+RlXZH3id68zMcBjwJz8jSuAh7P054D7F6Z9+rtxYv718m8uH99MPebDvwReC7P79+Guf/fD3yg0j4GWAgc2bqe83KeBjyS67kE2Cz361vWsbn9g8B9eTs/BHwod39FXkcv5LpXANuQjo3L1nT79LN8xwI/qbRvkuvcN7fvBPwIeAp4EvgmL+7vA23TvuXcBpiV94UHgRMq89oPmEvat58Azul63nW7gLXlD9g0b/SZwCHA5i39vwp8odJ+Yt9Blg/MVcA/AesDJ5CC6lukM8vd807zqjz8GaTAfgcwNh84DwOfqoz/cGVe3wUuyAfLVsDtlQPoJTt07jYjHwhvJB2kG/HSINkv939b7r8tsFub9TKP9MLxncr4q4OfdHX0O9LZ4Vhgam7fMvefTQq73XP/9fMB8728zncnXVXdBOxIepG7Fzgmj78l8B7SgTqeFJbXVuqbTfvgf5wckG3670J6YXxbruvv80G7QWXZbycd1FuQAuzDud/ngfPzeOsDbwZUXWeV+fTmZb4kb8ONc/f/nZdpQ+A84M6WbVgN/lXAmXleh5JOOjZvHXYN9v9PATdW2t9B2ofXb13Pue4H8/Yal/eNS1uWtS8QDyOFqoC35Lr3qSzX/JY6ziAH/5psn36W71jycUJ6UfsY6cVyq9zt1Xk+G5JO5uYA57UeB/1s077lnEPKiI2AvfK6+/Pc7xbg6Nw8Dnh91/Ou2wWsTX+kM+gZpLOrVaRX8Em53/6kAOs7uOcC763swCuBMbl9fN4p9q9M+w7gqMrOfUOl3+GkM4nW8ScAk0jBuHFl+KnAza07dKX/DOCSfrr1BckFwLk118k8UvC/lvRi0cNLg/9o4PaWcW4Bjs3Ns4EzW/oH8MaWdfPJSvvZ1YOuZdy9gN9V2mfTPvj/SJsrmdz/H4ErK+3rka6oDqgs+7RK//8HnJ+bzyS9eL263TqrtPfmZd5xgFom5GE262d79e1fYyvDLyYHCCMT/Dvk9bVdbv8m8OX+1jPpRfqjlX675nHH0hKI/cznWuDEynINFPzD3j79zPdY0jG9NNe6knz8thn+KOAXNbbpWGB70lX3+Er/zwMzcvMc0m3jiWuyjUbyz/f4KyLivog4NiK2IwXdNqQzMSLiNtLZygGSdiOdIcyqjP5UvPhGz8r8/4lK/5WkV3va9Huyn/HHAa8kne0skrRU0lJScG81yOI8NkC/7Um3d2qLiLtJtzZOaem1DemSv+oR0lXEQLW0Ln+/60rSJpIukPSIpGWkg2hCzfctngK2HqD/S2qPiBdyrdXaH680/54Xt+EXSWefP5T0kKTW9dKf1etB0hhJZ0n6TV6uebnXxHbLEhGr2tQyoPykzIr8d35/w0TEo6R1O03SOFLwvewN36x1mz9CCsBJ/cz7EEm35ocllpKuVtot44DzGeL26c+tETGBdPtwFukqra/OSZK+LWlB3h6XDbHO30bE8kq36jFwHOnq5deSfjaUN5Wb4uBvIyJ+TTqTem2l80xgGuks9+qI+EMHSnmMdMY/MSIm5L9NI2L3vlLbjNeue980dxpGLaeTbkNVD7yFpBenqh1IZ2Z1ahnMyaQzyv0jYlPgz3J31Rj3RtJtonZeUrskkV4UF7QdI4uI5RFxckTsSLoPfZKkA/t6txut0vx+0vssB5Fub/X2lTHYvAeZbn+1fi7SEyjjIuLDAww6k7Rvv4d0q/GONsO1bvMdSGfT1RdvJG0IXAN8iXTlPIH0vlPfMg62Xwx7+wwkIlYAHwGOlrR37vy5XM8eeT+bxku3xUC1LgS2kFR9YGD1MRARD0TEVNLJ2heAq/t5I72jHPyZpN0knSxpu9y+PemWyq2VwS4D3kXaKdqdDY2oiFgE/BA4W9KmktaTtJOkt+RBngC2k7TBECZ7EfBBSQfm6W2br2IGq+VB0hto/6fS+XpgF0nvlzRW0l8Bk0lXByNhPOkKYKmkLUgvPnWdDrxB0hcl/QmApFdLuiw/zXElcFheD+uTXmSeJb3JPCBJ78zTEukW2POkNwAhbZMdayzXs6Srkk1IwTNcdeZXxzWkwPo06UWgncuB/yvpVfnq4HPAFS1XJJAeitiQdL97laRDgLe31L2lpM3azGfY22cwkZ5U+gbpfTlI22MF8LSkbYG/axml7TqOiMdyTZ+XtJGkPUln+ZcBSJomqSdfsSzNo73Q37Q6xcH/ouWk+/i3SXqGFPh3k3Y2YPUG/jnp1f8/O1jbB0gH0b2kN06v5sVbGD8iPfXwuKQn60wsIm4nPW1xLim0fszLz9rbOZP0BmXftJ4C3klaT0+R3oB7Z0TUqqWG84CNSU9a3Ar8e90RI+I3wJ+SzqbvkfQ0KdzmAssj4n7Si/hX8vQPBw6PiOdqTH5n0hXFCtJ7Gl+NiJtzv88Dp+Vbc3/bZvxLSLcDFpC2661thqvjImBynt+1w51IRDxDWj/bke7xt3Mx6UmXOaSHEv4A/E0/01tOOkm4krTfvp/K7dF8VX058FCufZuW8ddk+9RxHnBoDupPA/uQjofrSG9YVw22TaeS9rOFpIcxTo+IG3O/g0n73wrgy8D7ImJlP9PomL43Kq0mSRcDCyPitG7XYmY2HGvbB0nWapJ6gXcDew8yqJnZWsu3emqS9BnSrZ8vRsTD3a7HzGy4fKvHzKwwPuM3MyvMOnGPf+LEidHb29vtMszM1il33HHHkxHR09p9nQj+3t5e5s6d2+0yzMzWKZJaP1UP+FaPmVlxHPxmZoVx8JuZFcbBb2ZWGAe/mVlhHPxmZoVx8JuZFcbBb2ZWGAe/mVlh1olP7q6J3lOuW90876zDuliJmdnawWf8ZmaFcfCbmRXGwW9mVhgHv5lZYRoLfknbS7pZ0r2S7pF0Yu5+hqQFku7Mf4c2VYOZmb1ck0/1rAJOjoifSxoP3CHphtzv3Ij4UoPzNjOzNhoL/ohYBCzKzcsl3Qds29T8zMysno7c45fUC+wN3JY7fVzSXZIulrR5m3GmS5orae6SJUs6UaaZWREaD35J44BrgE9ExDLga8BOwF6kK4Kz+xsvIi6MiCkRMaWn52U/GWlmZsPUaPBLWp8U+t+MiO8ARMQTEfF8RLwAfB3Yr8kazMzspZp8qkfARcB9EXFOpfvWlcHeBdzdVA1mZvZyTT7V80bgaOBXku7M3U4FpkraCwhgHvChBmswM7MWTT7V8xNA/fS6vql5mpnZ4PzJXTOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK01jwS9pe0s2S7pV0j6QTc/ctJN0g6YH8f/OmajAzs5dr8ox/FXByREwGXg98TNJk4BTgpojYGbgpt5uZWYc0FvwRsSgifp6blwP3AdsCRwIz82AzgaOaqsHMzF6uI/f4JfUCewO3AZMiYlHu9TgwqRM1mJlZ0njwSxoHXAN8IiKWVftFRADRZrzpkuZKmrtkyZKmyzQzK0ajwS9pfVLofzMivpM7PyFp69x/a2Bxf+NGxIURMSUipvT09DRZpplZUZp8qkfARcB9EXFOpdcs4JjcfAzwvaZqMDOzlxvb4LTfCBwN/ErSnbnbqcBZwJWSjgMeAd7bYA1mZtaiseCPiJ8AatP7wKbma2ZmA/Mnd83MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrTK3gl7RH04WYmVln1D3j/6qk2yV9VNJmjVZkZmaNqhX8EfFm4K+B7YE7JH1L0tsarczMzBpR+x5/RDwAnAZ8EngL8P8l/VrSu5sqzszMRl7de/x7SjoXuA/4c+DwiHhNbj63wfrMzGyEja053FeAbwCnRsTKvo4RsVDSaY1UZmZmjagb/IcBKyPieQBJ6wEbRcTvI+LSxqozM7MRV/ce/43AxpX2TXI3MzNbx9QN/o0iYkVfS27epJmSzMysSXWD/xlJ+/S1SPpfwMoBhkfSxZIWS7q70u0MSQsk3Zn/Dh1e2WZmNlx17/F/ArhK0kJAwJ8AfzXIODOAfwEuael+bkR8aQg1mpnZCKoV/BHxM0m7AbvmTvdHxB8HGWeOpN41rM/MzEbYUL6kbV9gT2AfYKqkDwxznh+XdFe+FbR5u4EkTZc0V9LcJUuWDHNWZmbWqu4HuC4FvgS8ifQCsC8wZRjz+xqwE7AXsAg4u92AEXFhREyJiCk9PT3DmJWZmfWn7j3+KcDkiIg1mVlEPNHXLOnrwPfXZHpmZjZ0dW/13E16Q3eNSNq60vquPF0zM+ugumf8E4F7Jd0OPNvXMSKOaDeCpMuBA4CJkuYDpwMHSNoLCGAe8KFhVW1mZsNWN/jPGOqEI2JqP50vGup0zMxsZNV9nPPHkl4J7BwRN0raBBjTbGlmZtaEuk/1nABcDVyQO20LXNtQTWZm1qC6t3o+BuwH3AbpR1kkbdVYVQ3pPeW61c3zzjqsi5WYmXVP3ad6no2I5/paJI0lvUFrZmbrmLrB/2NJpwIb59/avQr4t+bKMjOzptQN/lOAJcCvSI9gXk/6/V0zM1vH1H2q5wXg6/nPzMzWYbWCX9LD9HNPPyJ2HPGKzMysUUP5rp4+GwF/CWwx8uWYmVnTat3jj4inKn8LIuI80g+wm5nZOqburZ59Kq3rka4A6l4tmJnZWqRueFe/N38V6QvW3jvi1ZiZWePqPtXz1qYLMTOzzqh7q+ekgfpHxDkjU46ZmTVtKE/17AvMyu2HA7cDDzRRlJmZNadu8G8H7BMRywEknQFcFxHTmirMzMyaUfcrGyYBz1Xan8vdzMxsHVP3jP8S4HZJ383tRwEzG6nIzMwaVfepnn+W9APgzbnTByPiF82VZWZmTal7qwdgE2BZRHwZmC/pVQ3VZGZmDar704unA58E/iF3Wh+4rKmizMysOXXP+N8FHAE8AxARC4HxTRVlZmbNqRv8z0VEkL+aWdIrmivJzMyaVDf4r5R0ATBB0gnAjfhHWczM1kmDPtUjScAVwG7AMmBX4J8i4oaGazMzswYMGvwREZKuj4g9AIe9mdk6ru6tnp9L2rfRSszMrCPqfnJ3f2CapHmkJ3tEuhjYs6nCzMysGQMGv6QdIuJR4B0dqsfMzBo22Bn/taRv5XxE0jUR8Z4O1GRmZg0a7B6/Ks07NlmImZl1xmDBH22aByXpYkmLJd1d6baFpBskPZD/bz6UaZqZ2ZobLPhfJ2mZpOXAnrl5maTlkpYNMu4M4OCWbqcAN0XEzsBNud3MzDpowHv8ETFmuBOOiDmSels6HwkckJtnArNJX/5mZmYdMpSvZR4JkyJiUW5+nAF+xUvSdElzJc1dsmRJZ6ozMytAp4N/teqXvrXpf2FETImIKT09PR2szMxsdOt08D8haWuA/H9xh+dvZla8Tgf/LOCY3HwM8L0Oz9/MrHiNBb+ky4FbgF0lzZd0HHAW8DZJDwAH5XYzM+ugut/VM2QRMbVNrwObmqeZmQ2ua2/umplZdzj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwK4+A3MyuMg9/MrDAOfjOzwjj4zcwKM7bbBYw2vadct7p53lmHdbESM7P++YzfzKwwDn4zs8I4+M3MCuPgNzMrTFfe3JU0D1gOPA+siogp3ajDzKxE3Xyq560R8WQX529mViTf6jEzK0y3gj+AH0q6Q9L0/gaQNF3SXElzlyxZ0uHyzMxGr24F/5siYh/gEOBjkv6sdYCIuDAipkTElJ6ens5XaGY2SnUl+CNiQf6/GPgusF836jAzK1HHg1/SKySN72sG3g7c3ek6zMxK1Y2neiYB35XUN/9vRcS/d6EOM7MidTz4I+Ih4HWdnq+ZmSV+nNPMrDAOfjOzwvj7+Gvy9+yb2WjhM34zs8I4+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrjJ/jb1Hnef3qMAMN10Qd/jyBma0pn/GbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVhgHv5lZYYp9jn8kn4dvfa5/KMOs68/iD7Qe1+bPHKzNtZk1zWf8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVphin+OvaveMfZ3n84cz3ZEaZk0+G1Dn+/7b6fRz70P9jYSR+v2CTjzrX9rnCdbGdTrU46jp38roxO99+IzfzKwwDn4zs8I4+M3MCuPgNzMrTFeCX9LBku6X9KCkU7pRg5lZqToe/JLGAP8KHAJMBqZKmtzpOszMStWNM/79gAcj4qGIeA74NnBkF+owMyuSIqKzM5T+Ajg4Io7P7UcD+0fEx1uGmw5Mz627AvcPc5YTgSeHOe5o4XXgdVD68kOZ6+CVEdHT2nGt/QBXRFwIXLim05E0NyKmjEBJ6yyvA6+D0pcfvA6qunGrZwGwfaV9u9zNzMw6oBvB/zNgZ0mvkrQB8D5gVhfqMDMrUsdv9UTEKkkfB/4DGANcHBH3NDjLNb5dNAp4HXgdlL784HWwWsff3DUzs+7yJ3fNzArj4DczK8yoCf7BvgZC0oaSrsj9b5PU24UyG1VjHZwk6V5Jd0m6SdIru1FnU+p+FYik90gKSaPu0b4660DSe/N+cI+kb3W6xqbVOA52kHSzpF/kY+HQbtTZVRGxzv+R3iT+DbAjsAHwS2ByyzAfBc7Pze8Druh23V1YB28FNsnNHxlN66DO8ufhxgNzgFuBKd2uuwv7wM7AL4DNc/tW3a67C+vgQuAjuXkyMK/bdXf6b7Sc8df5GogjgZm5+WrgQEnqYI1NG3QdRMTNEfH73Hor6TMUo0XdrwL5DPAF4A+dLK5D6qyDE4B/jYjfAUTE4g7X2LQ66yCATXPzZsDCDta3Vhgtwb8t8FilfX7u1u8wEbEKeBrYsiPVdUaddVB1HPCDRivqrEGXX9I+wPYRsWa/qbn2qrMP7ALsIumnkm6VdHDHquuMOuvgDGCapPnA9cDfdKa0tcda+5UN1hxJ04ApwFu6XUunSFoPOAc4tsuldNtY0u2eA0hXfHMk7RERS7tZVIdNBWZExNmS/hS4VNJrI+KFbhfWKaPljL/O10CsHkbSWNIl3lMdqa4zan0VhqSDgE8BR0TEsx2qrRMGW/7xwGuB2ZLmAa8HZo2yN3jr7APzgVkR8ceIeBj4b9ILwWhRZx0cB1wJEBG3ABuRvsCtGKMl+Ot8DcQs4Jjc/BfAjyK/uzNKDLoOJO0NXEAK/dF2b3fA5Y+IpyNiYkT0RkQv6T2OIyJibnfKbUSd4+Ba0tk+kiaSbv081MEam1ZnHTwKHAgg6TWk4F/S0Sq7bFQEf75n3/c1EPcBV0bEPZLOlHREHuwiYEtJDwInAaPql79qroMvAuOAqyTdKWnUfEdSzeUf1Wqug/8AnpJ0L3Az8HcRMWqufGuug5OBEyT9ErgcOHaUnQQOyl/ZYGZWmFFxxm9mZvU5+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrzP8A7gFLt0lbV+MAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "invConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 123, "id": "infectious-clothing", "metadata": {}, "outputs": [], "source": [ "pd.concat([invConstDF1, invConstDF2, invConstDF3]).to_csv('../../allConstraintsAnalysis_Final/invConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "working-stable", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 138, "id": "saved-twelve", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cbac40ccbf5c44a9963ead4fa2f371ac", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/52 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "mature-suite", "metadata": {}, "source": [ "# Combine Plots for constraints" ] }, { "cell_type": "code", "execution_count": null, "id": "behind-nurse", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "typeConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/typeConstDFAnalysis.csv\")\n", "typeConstDF = typeConstDF.set_index(typeConstDF.iloc[:, 0])\n", "\n", "valTypeConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/valueTypeConstDFAnalysis.csv\")\n", "valTypeConstDF = valTypeConstDF.set_index(valTypeConstDF.iloc[:, 0])\n", "\n", "codepConstDF1 = pd.read_csv(\"../../allConstraintsAnalysis_Final/codepConstDFAnalysis.csv\")\n", "codepConstDF1 = codepConstDF1.set_index(codepConstDF1.iloc[:, 0])\n", "\n", "symmConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/symmConstDFAnalysis.csv\")\n", "symmConstDF = symmConstDF.set_index(symmConstDF.iloc[:, 0])\n", "\n", "invConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/invConstDFAnalysis.csv\")\n", "invConstDF = invConstDF.set_index(invConstDF.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": null, "id": "ultimate-chorus", "metadata": {}, "outputs": [], "source": [ "typeConstDF1 = typeConstDF.add_suffix(\"_type_const\")['violation_ratio_type_const'].rename().sort_values()" ] }, { "cell_type": "code", "execution_count": null, "id": "dynamic-castle", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1 = valTypeConstDF.add_suffix(\"_valuetype_const\")['violation_ratio_valuetype_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": null, "id": "global-performer", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1 = codepConstDF1.add_suffix(\"_codep_const\")['violation_ratio_codep_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": null, "id": "enabling-instrumentation", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1 = symmConstDF.add_suffix(\"_symm_const\")['violation_ratio_symm_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": null, "id": "realistic-cannon", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1 = invConstDF.add_suffix(\"_inv_const\")['violation_ratio_inv_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": null, "id": "about-coalition", "metadata": {}, "outputs": [], "source": [ "typeConstDF1.index.names = ['property']\n", "typeConstDF1 = typeConstDF1.reset_index().reset_index()\n", "typeConstDF1['index'] = typeConstDF1['index'].apply(lambda p: (p+1) * 100/len(typeConstDF1))\n", "typeConstDF1 = typeConstDF1.set_index('index')[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "hidden-anaheim", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1.index.names = ['property']\n", "valTypeConstDF1 = valTypeConstDF1.reset_index().reset_index()\n", "valTypeConstDF1['index'] = valTypeConstDF1['index'].apply(lambda p: (p+1) * 100/len(valTypeConstDF1))\n", "valTypeConstDF1 = valTypeConstDF1.set_index('index')['violation_ratio_valuetype_const']" ] }, { "cell_type": "code", "execution_count": null, "id": "toxic-straight", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1.index.names = ['property']\n", "codepConstDF1_1 = codepConstDF1_1.reset_index().reset_index()\n", "codepConstDF1_1['index'] = codepConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(codepConstDF1_1))\n", "codepConstDF1_1 = codepConstDF1_1.set_index('index')['violation_ratio_codep_const']" ] }, { "cell_type": "code", "execution_count": null, "id": "legal-socket", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1.index.names = ['property']\n", "symmConstDF1_1 = symmConstDF1_1.reset_index().reset_index()\n", "symmConstDF1_1['index'] = symmConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(symmConstDF1_1))\n", "symmConstDF1_1 = symmConstDF1_1.set_index('index')['violation_ratio_symm_const']" ] }, { "cell_type": "code", "execution_count": null, "id": "dietary-attack", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1.index.names = ['property']\n", "invConstDF1_1 = invConstDF1_1.reset_index().reset_index()\n", "invConstDF1_1['index'] = invConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(invConstDF1_1))\n", "invConstDF1_1 = invConstDF1_1.set_index('index')['violation_ratio_inv_const']" ] }, { "cell_type": "code", "execution_count": null, "id": "familiar-packing", "metadata": {}, "outputs": [], "source": [ "typeConstDF2 = [np.percentile(typeConstDF1,i)*100 for i in range(1, 101)]\n", "valTypeConstDF2 = [np.percentile(valTypeConstDF1,i)*100 for i in range(1, 101)]\n", "codepConstDF1_2 = [np.percentile(codepConstDF1_1,i)*100 for i in range(1, 101)]\n", "symmConstDF1_2 = [np.percentile(symmConstDF1_1,i)*100 for i in range(1, 101)]\n", "invConstDF1_2 = [np.percentile(invConstDF1_1,i)*100 for i in range(1, 101)]" ] }, { "cell_type": "code", "execution_count": null, "id": "forward-tutorial", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = pd.DataFrame({'index':list(range(1, 101)), 'type': typeConstDF2, 'value type': valTypeConstDF2, 'irs': codepConstDF1_2, 'symmetric': symmConstDF1_2, 'inverse': invConstDF1_2})" ] }, { "cell_type": "code", "execution_count": null, "id": "mobile-rings", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = constAnalysisDF.melt('index', var_name='constraint', value_name='VR')" ] }, { "cell_type": "code", "execution_count": null, "id": "contrary-conviction", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF.head()" ] }, { "cell_type": "code", "execution_count": 33, "id": "ruled-rough", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Proportion of properties (in %)'),\n", " Text(0, 0.5, 'Violation Ratio (in %)')]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10, 6))\n", "ax = sns.lineplot(x='index', y='VR', hue='constraint', data=constAnalysisDF)\n", "ax.set(xlabel=\"Proportion of properties (in %)\", ylabel = \"Violation Ratio (in %)\")" ] }, { "cell_type": "markdown", "id": "amazing-growing", "metadata": {}, "source": [ "## Scatterplot" ] }, { "cell_type": "code", "execution_count": 7, "id": "federal-diversity", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "typeConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/typeConstDFAnalysis.csv\")\n", "typeConstDF = typeConstDF.set_index(typeConstDF.iloc[:, 0])\n", "\n", "valTypeConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/valueTypeConstDFAnalysis.csv\")\n", "valTypeConstDF = valTypeConstDF.set_index(valTypeConstDF.iloc[:, 0])\n", "\n", "codepConstDF1 = pd.read_csv(\"../../allConstraintsAnalysis_Final/codepConstDFAnalysis.csv\")\n", "codepConstDF1 = codepConstDF1.set_index(codepConstDF1.iloc[:, 0])\n", "\n", "symmConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/symmConstDFAnalysis.csv\")\n", "symmConstDF = symmConstDF.set_index(symmConstDF.iloc[:, 0])\n", "\n", "invConstDF = pd.read_csv(\"../../allConstraintsAnalysis_Final/invConstDFAnalysis.csv\")\n", "invConstDF = invConstDF.set_index(invConstDF.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": 8, "id": "polish-supplier", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
violation_ratio_inv_const
Unnamed: 0
P16050.940299
P34480.879769
P9260.857143
P9250.833333
P10290.826516
\n", "
" ], "text/plain": [ " violation_ratio_inv_const\n", "Unnamed: 0 \n", "P1605 0.940299\n", "P3448 0.879769\n", "P926 0.857143\n", "P925 0.833333\n", "P1029 0.826516" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF1 = typeConstDF.add_suffix(\"_type_const\")[['violation_ratio_type_const']].sort_values(by=['violation_ratio_type_const'],ascending=False)\n", "valTypeConstDF1 = valTypeConstDF.add_suffix(\"_valuetype_const\")[['violation_ratio_valuetype_const']].sort_values(by=['violation_ratio_valuetype_const'],ascending=False)\n", "codepConstDF1_1 = codepConstDF1.add_suffix(\"_codep_const\")[['violation_ratio_codep_const']].sort_values(by=['violation_ratio_codep_const'],ascending=False)\n", "symmConstDF1_1 = symmConstDF.add_suffix(\"_symm_const\")[['violation_ratio_symm_const']].sort_values(by=['violation_ratio_symm_const'],ascending=False)\n", "invConstDF1_1 = invConstDF.add_suffix(\"_inv_const\")[['violation_ratio_inv_const']].sort_values(by=['violation_ratio_inv_const'],ascending=False)\n", "invConstDF1_1.head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "sexual-giant", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1456, 897, 527, 38, 110)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(typeConstDF1), len(valTypeConstDF1), len(codepConstDF1_1), len(symmConstDF), len(invConstDF1_1)" ] }, { "cell_type": "code", "execution_count": 10, "id": "photographic-signature", "metadata": {}, "outputs": [], "source": [ "typeConstDF1.index.names = ['property']\n", "typeConstDF1 = typeConstDF1.reset_index()[['violation_ratio_type_const']]\n", "valTypeConstDF1.index.names = ['property']\n", "valTypeConstDF1 = valTypeConstDF1.reset_index()[['violation_ratio_valuetype_const']]\n", "codepConstDF1_1.index.names = ['property']\n", "codepConstDF1_1 = codepConstDF1_1.reset_index()[['violation_ratio_codep_const']]\n", "symmConstDF1_1.index.names = ['property']\n", "symmConstDF1_1 = symmConstDF1_1.reset_index()[['violation_ratio_symm_const']]\n", "invConstDF1_1.index.names = ['property']\n", "invConstDF1_1 = invConstDF1_1.reset_index()[['violation_ratio_inv_const']]\n", "\n", "import numpy as np\n", "\n", "typeConstDF2 = pd.DataFrame({'index': list(np.arange(100, 1, -0.05)), 'type': [np.percentile(typeConstDF1,i)*100 for i in np.arange(1, 100,0.05)]}).set_index('index')\n", "valTypeConstDF2 = pd.DataFrame({'index': list(np.arange(100, 1, -0.1)), 'value type': [np.percentile(valTypeConstDF1,i)*100 for i in np.arange(1, 100, 0.1)]}).set_index('index')\n", "codepConstDF1_2 = pd.DataFrame({'index': list(np.arange(100, 1, -0.5)), 'irs': [np.percentile(codepConstDF1_1,i)*100 for i in np.arange(1, 100, 0.5)]}).set_index('index')\n", "symmConstDF1_2 = pd.DataFrame({'index': list(np.arange(100, 1, -5)), 'symmetric': [np.percentile(symmConstDF1_1,i)*100 for i in np.arange(1, 100,5)]}).set_index('index')\n", "invConstDF1_2 = pd.DataFrame({'index': list(np.arange(100, 1, -1)), 'inverse': [np.percentile(invConstDF1_1,i)*100 for i in np.arange(1, 100,1)]}).set_index('index')" ] }, { "cell_type": "code", "execution_count": 11, "id": "unique-marijuana", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = typeConstDF2.join(valTypeConstDF2, how='outer').join(codepConstDF1_2, how='outer').join(symmConstDF1_2, how='outer').join(invConstDF1_2, how='outer')\n", "constAnalysisDF = constAnalysisDF.reset_index()" ] }, { "cell_type": "code", "execution_count": 12, "id": "social-pipeline", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = constAnalysisDF.melt('index', var_name='constraint', value_name='VR')" ] }, { "cell_type": "code", "execution_count": 13, "id": "institutional-imaging", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexconstraintVR
01.05type100.0
11.10type100.0
21.15type100.0
31.20type100.0
41.25type100.0
\n", "
" ], "text/plain": [ " index constraint VR\n", "0 1.05 type 100.0\n", "1 1.10 type 100.0\n", "2 1.15 type 100.0\n", "3 1.20 type 100.0\n", "4 1.25 type 100.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF.head()" ] }, { "cell_type": "code", "execution_count": 16, "id": "straight-purpose", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Properties'),\n", " Text(0, 0.5, 'Violation Ratio (in %)'),\n", " [Text(-20.0, 0, ''),\n", " Text(0.0, 0, ''),\n", " Text(20.0, 0, ''),\n", " Text(40.0, 0, ''),\n", " Text(60.0, 0, ''),\n", " Text(80.0, 0, ''),\n", " Text(100.0, 0, ''),\n", " Text(120.0, 0, '')]]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(15, 10))\n", "sns.set(font_scale=2)\n", "ax = sns.scatterplot(x='index',y='VR',hue='constraint',data=constAnalysisDF)\n", "ax.set(xlabel=\"Properties\", ylabel = \"Violation Ratio (in %)\",xticklabels=[])\n", "# h,l = ax.get_legend_handles_labels()\n", "# plt.legend(h[0:3],l[0:3],bbox_to_anchor=(1.05, 1), loc=0, borderaxespad=0., fontsize=13)\n", "# plt.show(ax)" ] }, { "cell_type": "markdown", "id": "stuck-criticism", "metadata": {}, "source": [ "# Analysis on properties with constraints" ] }, { "cell_type": "code", "execution_count": 26, "id": "driven-reference", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-03 09:14:12 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " PARAS: ['P2302']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->()\" \\\n", " -o ../../constraintsOP/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 39, "id": "exciting-focus", "metadata": {}, "outputs": [], "source": [ "!kgtk unique -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz --column node1 -o ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 42, "id": "flush-romania", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "node1\tlabel\tnode2\r\n", "P10\tcount\t17\r\n", "P1000\tcount\t10\r\n", "P1001\tcount\t26\r\n", "P1002\tcount\t9\r\n", "P1003\tcount\t20\r\n", "P1004\tcount\t33\r\n", "P1005\tcount\t21\r\n", "P1006\tcount\t26\r\n", "P1007\tcount\t19\r\n" ] } ], "source": [ "!head ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 43, "id": "chemical-harris", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props = pd.read_csv(\"../../constraintsOP/claims.constraints_list.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 44, "id": "higher-underground", "metadata": {}, "outputs": [], "source": [ "props2 = props.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 45, "id": "light-appreciation", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8100" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 48, "id": "yellow-helmet", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2336, 8100)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "detected-skiing", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": 32, "id": "processed-perfume", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props2 = pd.read_csv(\"../../constraintsOP/claims.propList.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 33, "id": "increasing-graphics", "metadata": {}, "outputs": [], "source": [ "props2 = props2.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 34, "id": "posted-ukraine", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8193" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 35, "id": "fifth-provision", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2415, 8193)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplit_Final/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "married-heating", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": null, "id": "magnetic-conditions", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "kgtkEnv", "language": "python", "name": "kgtkenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "318px" }, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "oldHeight": 122, "position": { "height": "40px", "left": "1170px", "right": "20px", "top": "120px", "width": "250px" }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "varInspector_section_display": "none", "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }