{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "juvenile-ability", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f09d8d199d445fbb9e4ed86e3bb148e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1149471184 [00:00(node2), c: (rLabel)-[:P2308]->(parent), d: (node1)-[]->(par), c: (eLabel)-[:P2303]->(eNode)\" \\\n", " --where 'nodeProp.label = rLabel and (par = parent or (rLabel = eLabel and node1 = eNode))' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2, max(parent) as `node1;ancestor`' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.all.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 4, "id": "abstract-retreat", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 22:33:26 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT graph_11_c1.\"id\", graph_11_c1.\"node1\", graph_11_c1.\"label\", graph_11_c1.\"node2\"\r\n", " FROM graph_11 AS graph_11_c1, graph_14 AS graph_14_c2\r\n", " WHERE graph_11_c1.\"node1\"=graph_14_c2.\"node1\"\r\n", " AND (graph_14_c2.\"node2\" IN (?, ?, ?))\r\n", " PARAS: ['Q1238720', 'Q3331189', 'Q47461344']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplit/claims.P996.tsv \\\n", " ../../wikidata-20210215/derived.isastar.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q1238720\",\"Q3331189\",\"Q47461344\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.P996.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 7, "id": "strange-truck", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "81289 ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv\r\n" ] } ], "source": [ "!wc -l ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv" ] }, { "cell_type": "code", "execution_count": 8, "id": "finnish-hampton", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 22:49:54 sqlstore]: IMPORT graph directly into table graph_15 from /data/wd-correctness/propertiesSplit/claims.P991.tsv ...\n", "[2021-03-12 22:49:54 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT graph_15_c1.\"id\", graph_15_c1.\"node1\", graph_15_c1.\"label\", graph_15_c1.\"node2\"\n", " FROM graph_15 AS graph_15_c1, graph_5 AS graph_5_c2\n", " WHERE graph_15_c1.\"node1\"=graph_5_c2.\"node1\"\n", " AND (graph_5_c2.\"node2\" IN (?))\n", " PARAS: ['Q40231']\n", "---------------------------------------------\n", "[2021-03-12 22:49:55 sqlstore]: CREATE INDEX on table graph_15 column node1 ...\n", "[2021-03-12 22:49:55 sqlstore]: ANALYZE INDEX on table graph_15 column node1 ...\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplit/claims.P991.tsv \\\n", " ../../wikidata-20210215/derived.P31P279star.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q40231\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.P991.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 9, "id": "elegant-reverse", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 23:54:56 sqlstore]: IMPORT graph directly into table graph_16 from /data/wd-correctness/propertiesSplit/claims.P965.tsv ...\n", "[2021-03-12 23:54:56 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT graph_16_c1.\"id\", graph_5_c2.\"node1\", graph_16_c1.\"label\", graph_16_c1.\"node2\"\n", " FROM graph_16 AS graph_16_c1, graph_5 AS graph_5_c2\n", " WHERE graph_16_c1.\"node1\"=graph_5_c2.\"node1\"\n", " AND (graph_5_c2.\"node2\" IN (?))\n", " PARAS: ['Q6023295']\n", "---------------------------------------------\n", "[2021-03-12 23:54:56 sqlstore]: CREATE INDEX on table graph_16 column node1 ...\n", "[2021-03-12 23:54:56 sqlstore]: ANALYZE INDEX on table graph_16 column node1 ...\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplit/claims.P965.tsv \\\n", " ../../wikidata-20210215/derived.P31P279star.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q6023295\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.P965.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.incorrect.tsv" ] }, { "cell_type": "markdown", "id": "matched-strength", "metadata": {}, "source": [ "# Generate Queries" ] }, { "cell_type": "markdown", "id": "black-insured", "metadata": {}, "source": [ "## Type Constraint" ] }, { "cell_type": "markdown", "id": "interior-humor", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 1, "id": "clinical-brunei", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv('../../constraintsOP/typeConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 2, "id": "assured-cleaners", "metadata": {}, "outputs": [], "source": [ "df1 = df.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 3, "id": "sharing-evolution", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1001P2308[Q102496, Q105985, Q1140371, Q1151067, Q119768...
1P1001P2309[Q30208840]
2P1002P2308[Q630010]
3P1002P2309[Q21514624]
4P1004P2308[Q2221906, Q23413, Q3947, Q41176, Q88291]
\n", "
" ], "text/plain": [ " node1 label node2\n", "0 P1001 P2308 [Q102496, Q105985, Q1140371, Q1151067, Q119768...\n", "1 P1001 P2309 [Q30208840]\n", "2 P1002 P2308 [Q630010]\n", "3 P1002 P2309 [Q21514624]\n", "4 P1004 P2308 [Q2221906, Q23413, Q3947, Q41176, Q88291]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "still-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
9318P8138P2308[Q27096213]
9319P8138P2309[Q21514624]
\n", "
" ], "text/plain": [ " node1 label node2\n", "9318 P8138 P2308 [Q27096213]\n", "9319 P8138 P2309 [Q21514624]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1[df1['node1'] == 'P8138']" ] }, { "cell_type": "markdown", "id": "solid-browser", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 8, "id": "bright-impossible", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "95b7843f9024492698ef1b367b6db289", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4810 [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Something failed for prop:\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mprop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mfOP\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0mfOP\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'fOP' is not defined" ] } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 1\n", "for prop in tqdm(df1.node1.unique()):\n", " try:\n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop +\".tsv\")):\n", " continue\n", " relation = df1[(df1['node1'] == prop) & (df1['label'] == 'P2309')].node2.values[0][0]\n", " type1 = df1[(df1['node1'] == prop) & (df1['label'] == 'P2316')].node2.values\n", "# print(type1)\n", "\n", " parents = df1[(df1['node1'] == prop) & (df1['label'] == 'P2308')].node2.values[0]\n", " exceptions = df1[(df1['node1'] == prop) & (df1['label'] == 'P2303')].node2.values\n", " \n", " if relation == \"Q21503252\":\n", " parentFile = \"P31\"\n", " parentTitle = 'instanceOf'\n", " elif relation == \"Q21514624\":\n", " parentFile = \"P279\"\n", " parentTitle = 'subclass'\n", " else:\n", " parentFile = \"isa\"\n", " parentTitle = 'instanceOfOrSubclass'\n", "\n", " if len(type1) != 0 and type1[0][0] == \"Q21502408\":\n", " typeVal = \"mandatory\"\n", " elif len(type1) != 0 and type1[0][0] == \"Q62026391\":\n", " typeVal = \"suggestion\"\n", " else:\n", " typeVal = \"normal\"\n", "\n", " if len(exceptions):\n", " exceptionPart = \"or node1 in \" + str(exceptions[0]).replace(\"'\",'\"')\n", " else:\n", " exceptionPart = \"\"\n", " \n", " folderName = 'typeConstraint_Final'\n", " shellFileSuffix = 'typeConstraintValidator'\n", " \n", " if cnt % 120 == 0:\n", " if fOP:\n", " fOP.close()\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " fCnt += 1\n", " \n", " fOP.write(\"{ time (kgtk --debug query -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", " ../../wikidata-20210215/derived.P279star.tsv.gz \\\n", " --match 'm: (node1)-[nodeProp]->(node2), \" + parentFile + \": (node1)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/const110_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\")\n", "\n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 9, "id": "electrical-agreement", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1456" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 72, "id": "outside-stupid", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,14):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/typeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "competitive-canvas", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 33, "id": "casual-perth", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f14fb41a11474c79a4e03c391151de92", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1c8272e68ff941bb8d9453f8de872e8a", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/330 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P7424890389[../../allConstraintsAnalysis/typeConstraint_F...0.001817
P26635547[../../allConstraintsAnalysis/typeConstraint_F...0.012478
P5105191995[../../allConstraintsAnalysis/typeConstraint_F...0.047170
P693832[../../allConstraintsAnalysis/typeConstraint_F...0.400000
P3179505213[../../allConstraintsAnalysis/typeConstraint_F...0.002567
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P742 48903 89 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P2663 554 7 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P5105 1919 95 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P6938 3 2 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P3179 5052 13 [../../allConstraintsAnalysis/typeConstraint_F... \n", "\n", " violation_ratio \n", "P742 0.001817 \n", "P2663 0.012478 \n", "P5105 0.047170 \n", "P6938 0.400000 \n", "P3179 0.002567 " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 39, "id": "competitive-peeing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P81380461[../../allConstraintsAnalysis/typeConstraint_F...1.0
P5051064[../../allConstraintsAnalysis/typeConstraint_F...1.0
P2303039[../../allConstraintsAnalysis/typeConstraint_F...1.0
P1227019[../../allConstraintsAnalysis/typeConstraint_F...1.0
P2308017[../../allConstraintsAnalysis/typeConstraint_F...1.0
P6001016[../../allConstraintsAnalysis/typeConstraint_F...1.0
P8738014[../../allConstraintsAnalysis/typeConstraint_F...1.0
P538010[../../allConstraintsAnalysis/typeConstraint_F...1.0
P800406[../../allConstraintsAnalysis/typeConstraint_F...1.0
P558904[../../allConstraintsAnalysis/typeConstraint_F...1.0
P651004[../../allConstraintsAnalysis/typeConstraint_F...1.0
P717403[../../allConstraintsAnalysis/typeConstraint_F...1.0
P601403[../../allConstraintsAnalysis/typeConstraint_F...1.0
P81703[../../allConstraintsAnalysis/typeConstraint_F...1.0
P488203[../../allConstraintsAnalysis/typeConstraint_F...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P8138 0 461 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P5051 0 64 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P2303 0 39 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P1227 0 19 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P2308 0 17 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P6001 0 16 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P8738 0 14 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P538 0 10 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P8004 0 6 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P5589 0 4 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P6510 0 4 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P7174 0 3 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P6014 0 3 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P817 0 3 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P4882 0 3 [../../allConstraintsAnalysis/typeConstraint_F... \n", "\n", " violation_ratio \n", "P8138 1.0 \n", "P5051 1.0 \n", "P2303 1.0 \n", "P1227 1.0 \n", "P2308 1.0 \n", "P6001 1.0 \n", "P8738 1.0 \n", "P538 1.0 \n", "P8004 1.0 \n", "P5589 1.0 \n", "P6510 1.0 \n", "P7174 1.0 \n", "P6014 1.0 \n", "P817 1.0 \n", "P4882 1.0 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 40, "id": "backed-corruption", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../../allConstraintsAnalysis/typeConstraint_Final/normal/claims.type-constraints.instanceOf.P4945.correct.tsv',\n", " '../../allConstraintsAnalysis/typeConstraint_Final/normal/claims.type-constraints.instanceOf.P4945.incorrect.tsv']" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(typeConstDF.loc['P4945'].paths)" ] }, { "cell_type": "code", "execution_count": 1, "id": "interracial-fraud", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "Q10369487-P5051-Q4845565-75168cbb-0\tQ10369487\tP5051\tQ4845565\tnormal\twikibase-item\r\n", "Q12264664-P5051-Q1110918-cb778987-0\tQ12264664\tP5051\tQ1110918\tnormal\twikibase-item\r\n", "Q12264664-P5051-Q3394058-ac8798ff-0\tQ12264664\tP5051\tQ3394058\tnormal\twikibase-item\r\n", "Q12264664-P5051-Q801660-71c68e08-0\tQ12264664\tP5051\tQ801660\tnormal\twikibase-item\r\n", "Q1457751-P5051-Q6029401-3c0f721e-0\tQ1457751\tP5051\tQ6029401\tnormal\twikibase-item\r\n", "Q1457751-P5051-Q7643664-20983ead-0\tQ1457751\tP5051\tQ7643664\tnormal\twikibase-item\r\n", "Q1631914-P5051-Q3555260-04b1b724-0\tQ1631914\tP5051\tQ3555260\tnormal\twikibase-item\r\n", "Q2512768-P5051-Q493430-91955413-0\tQ2512768\tP5051\tQ493430\tnormal\twikibase-item\r\n", "Q2865993-P5051-Q9606-9312fd91-0\tQ2865993\tP5051\tQ9606\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysis/typeConstraint_Final/normal/claims.type-constraints.instanceOf.P5051.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 43, "id": "clinical-lawsuit", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1456.000000\n", "mean 0.095964\n", "std 0.221602\n", "min 0.000000\n", "25% 0.000594\n", "50% 0.008160\n", "75% 0.054471\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 44, "id": "wanted-domestic", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios')" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 45, "id": "sufficient-hollywood", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios (<=0.05)')" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF[typeConstDF['violation_ratio'] <= 0.05].violation_ratio.plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios (<=0.05)\")" ] }, { "cell_type": "code", "execution_count": 46, "id": "minor-marshall", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/1456\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(typeConstDF['violation_ratio'] >= 5.286054)}/{len(typeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 47, "id": "special-consensus", "metadata": {}, "outputs": [], "source": [ "# typeConstDF.sort_values(by=['incorrect'],ascending=False).head(5).paths.values" ] }, { "cell_type": "code", "execution_count": 48, "id": "excited-person", "metadata": {}, "outputs": [], "source": [ "# !cat ../../allConstraintsAnalysis/typeConstraint/normal/claims.type-constraints.instanceOf.P953.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 49, "id": "revolutionary-violence", "metadata": {}, "outputs": [], "source": [ "for key1 in typeConstViolations.keys():\n", " typeConstViolations[key1]['correct'] = typeConstViolations[key1]['instanceOf']['correct'] + typeConstViolations[key1]['subclass']['correct'] + typeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " typeConstViolations[key1]['incorrect'] = typeConstViolations[key1]['instanceOf']['incorrect'] + typeConstViolations[key1]['subclass']['incorrect'] + typeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " typeConstViolations[key1]['VR'] = typeConstViolations[key1]['incorrect'] / (typeConstViolations[key1]['correct'] + typeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 50, "id": "emotional-favorite", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 44768778, 'incorrect': 35027},\n", " 'subclass': {'correct': 1966, 'incorrect': 30},\n", " 'instanceOfOrSubclass': {'correct': 221394, 'incorrect': 204},\n", " 'propCount': 165,\n", " 'correct': 44992138,\n", " 'incorrect': 35261,\n", " 'VR': 0.0007831009736982587},\n", " 'suggestion': {'instanceOf': {'correct': 61486, 'incorrect': 18368},\n", " 'subclass': {'correct': 0, 'incorrect': 0},\n", " 'instanceOfOrSubclass': {'correct': 23314, 'incorrect': 2939},\n", " 'propCount': 11,\n", " 'correct': 84800,\n", " 'incorrect': 21307,\n", " 'VR': 0.200806732826298},\n", " 'normal': {'instanceOf': {'correct': 398100301, 'incorrect': 797888},\n", " 'subclass': {'correct': 96605, 'incorrect': 9442},\n", " 'instanceOfOrSubclass': {'correct': 66358710, 'incorrect': 265583},\n", " 'propCount': 1280,\n", " 'correct': 464555616,\n", " 'incorrect': 1072913,\n", " 'VR': 0.0023042252207016293}}" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstViolations" ] }, { "cell_type": "code", "execution_count": 51, "id": "aggregate-impact", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratiototal
P20931348051576527[../../allConstraintsAnalysis/typeConstraint_F...0.000048134811684
P14763985687647204[../../allConstraintsAnalysis/typeConstraint_F...0.00118339904080
P5773885531433977[../../allConstraintsAnalysis/typeConstraint_F...0.00087438889291
P14333672328312050[../../allConstraintsAnalysis/typeConstraint_F...0.00032836735333
P12153309919524104[../../allConstraintsAnalysis/typeConstraint_F...0.00072833123299
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2093 134805157 6527 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P1476 39856876 47204 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P577 38855314 33977 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P1433 36723283 12050 [../../allConstraintsAnalysis/typeConstraint_F... \n", "P1215 33099195 24104 [../../allConstraintsAnalysis/typeConstraint_F... \n", "\n", " violation_ratio total \n", "P2093 0.000048 134811684 \n", "P1476 0.001183 39904080 \n", "P577 0.000874 38889291 \n", "P1433 0.000328 36735333 \n", "P1215 0.000728 33123299 " ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['total'] = typeConstDF['correct'] + typeConstDF['incorrect']\n", "typeConstDF.sort_values(by=['total'],ascending=False).head()" ] }, { "cell_type": "code", "execution_count": 52, "id": "baking-garden", "metadata": {}, "outputs": [], "source": [ "typeConstDF.to_csv('../../allConstraintsAnalysis/typeConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "bearing-kruger", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 18, "id": "assumed-toner", "metadata": {}, "outputs": [], "source": [ "# from tqdm.notebook import tqdm\n", "# import os.path\n", "\n", "# cnt = 0\n", "# fCnt = 1\n", "# for prop in tqdm(df1.node1.unique()):\n", "# try:\n", "# if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop +\".tsv\")):\n", "# continue\n", "# relation = df1[(df1['node1'] == prop) & (df1['label'] == 'P2309')].node2.values[0][0]\n", "# type1 = df1[(df1['node1'] == prop) & (df1['label'] == 'P2316')].node2.values\n", "\n", "# parents = df1[(df1['node1'] == prop) & (df1['label'] == 'P2308')].node2.values[0]\n", "# exceptions = df1[(df1['node1'] == prop) & (df1['label'] == 'P2303')].node2.values\n", "\n", "# # print(prop, relation, type1, parents, exceptions)\n", "\n", "# if relation == \"Q21503252\":\n", "# parentFile = \"P31P279star\"\n", "# parentTitle = 'instanceOf'\n", "# elif relation == \"Q21514624\":\n", "# parentFile = \"P279star\"\n", "# parentTitle = 'subclass'\n", "# else:\n", "# parentFile = \"isastar\"\n", "# parentTitle = 'instanceOfOrSubclass'\n", "\n", "# if len(type1) != 0 and type1[0][0] == \"Q21502408\":\n", "# typeVal = \"mandatory\"\n", "# elif len(type1) != 0 and type1[0][0] == \"Q62026391\":\n", "# typeVal = \"suggestion\"\n", "# else:\n", "# typeVal = \"normal\"\n", "\n", "# if len(exceptions):\n", "# exceptionPart = \"or node1 in \" + str(exceptions[0]).replace(\"'\",'\"')\n", "# else:\n", "# exceptionPart = \"\"\n", " \n", "# if cnt % 100 == 0:\n", "# fOP = open(\"../../propertiesSplit/checkViolations/TimedTypeConstraintValidator\" + str(fCnt) + \".sh\",\"w\")\n", "# fCnt += 1\n", " \n", "# fOP.write(\"{ time kgtk --debug query -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", "# ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", "# --match 'm: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)' \\\n", "# --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", "# --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", "# -o ../../allConstraintsAnalysis/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", "# --graph-cache ~/sqlite3_caches/const2123_\" + str(fCnt) + \".sqlite3.db; } 2>> ../../propertiesSplit/checkViolations/exec_logs/TimedTypeConstraint_TimedTypeConstraintValidator\" + str(fCnt) + \".txt ; \\\n", "# kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", "# --filter-on ../../allConstraintsAnalysis/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", "# --filter-mode NONE \\\n", "# --input-keys node1 label \\\n", "# --filter-keys node1 label \\\n", "# -o ../../allConstraintsAnalysis/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv\\n\")\n", "\n", "# cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", " " ] }, { "cell_type": "code", "execution_count": 93, "id": "veterinary-fault", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "52944ea021934d23b3d4ab3fb1f091f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/122 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for type constraint checks\")" ] }, { "cell_type": "markdown", "id": "intense-computer", "metadata": {}, "source": [ "## Value Type Constraint" ] }, { "cell_type": "markdown", "id": "animated-companion", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 36, "id": "static-profit", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "dfValueType = pd.read_csv('../../constraintsOP/valuetypeConstraint/claims.type-constraints_all1.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 37, "id": "worthy-malawi", "metadata": {}, "outputs": [], "source": [ "dfValueType = dfValueType.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 38, "id": "eleven-tiffany", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1000P2308[Q1241356]
1P1000P2309[Q30208840]
2P1001P2308[Q20926517, Q2881272, Q2882257, Q3624078, Q389...
3P1001P2309[Q30208840]
4P1002P2308[Q2576663]
\n", "
" ], "text/plain": [ " node1 label node2\n", "0 P1000 P2308 [Q1241356]\n", "1 P1000 P2309 [Q30208840]\n", "2 P1001 P2308 [Q20926517, Q2881272, Q2882257, Q3624078, Q389...\n", "3 P1001 P2309 [Q30208840]\n", "4 P1002 P2308 [Q2576663]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType.head()" ] }, { "cell_type": "code", "execution_count": 39, "id": "expired-stuff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2308', 'P2309', 'P2303', 'P2316', 'P6607', 'P2304'], dtype=object)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType['label'].unique()" ] }, { "cell_type": "code", "execution_count": 40, "id": "imposed-newsletter", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [node1, label, node2]\n", "Index: []" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType['label'] == '2316']" ] }, { "cell_type": "code", "execution_count": 41, "id": "answering-alabama", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
330P1659P2308[Q18616576]
331P1659P2309[Q21503252]
332P1659P2316[Q21502408]
\n", "
" ], "text/plain": [ " node1 label node2\n", "330 P1659 P2308 [Q18616576]\n", "331 P1659 P2309 [Q21503252]\n", "332 P1659 P2316 [Q21502408]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType['node1'] == 'P1659']" ] }, { "cell_type": "code", "execution_count": 42, "id": "danish-blackberry", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
2031P991P2308[Q5, Q7210356]
2032P991P2309[Q21503252]
\n", "
" ], "text/plain": [ " node1 label node2\n", "2031 P991 P2308 [Q5, Q7210356]\n", "2032 P991 P2309 [Q21503252]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType.node1 == 'P991']" ] }, { "cell_type": "markdown", "id": "digital-harvard", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 43, "id": "white-badge", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bb623e1d72164970a40f3bc2d9ab6346", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/932 [00:00(node2), \" + parentFile + \": (node2)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/const111_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\")\n", " \n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 3, "id": "naughty-brown", "metadata": {}, "outputs": [], "source": [ "!zgrep -P \"Q98970042\\t\" ../../wikidata-20210215/derived.P279star.tsv.gz" ] }, { "cell_type": "code", "execution_count": 44, "id": "qualified-cursor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "897" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 45, "id": "simplified-cameroon", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,9):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/valueTypeConstraintValidator_xverify3\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "spectacular-warner", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 21, "id": "valid-defense", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c2c49c85190949669392e7e39bec677f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "285e1c2ed86c4e27be672d341374c5a8", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/212 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2302422110[../../allConstraintsAnalysis/valuetypeConstra...0.000000
P309246201[../../allConstraintsAnalysis/valuetypeConstra...0.000216
P3096111502[../../allConstraintsAnalysis/valuetypeConstra...0.000179
P31563480[../../allConstraintsAnalysis/valuetypeConstra...0.000000
P8747663[../../allConstraintsAnalysis/valuetypeConstra...0.000629
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2302 42211 0 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P3092 4620 1 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P3096 11150 2 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P3156 348 0 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P87 4766 3 [../../allConstraintsAnalysis/valuetypeConstra... \n", "\n", " violation_ratio \n", "P2302 0.000000 \n", "P3092 0.000216 \n", "P3096 0.000179 \n", "P3156 0.000000 \n", "P87 0.000629 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 27, "id": "neural-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P50080331026[../../allConstraintsAnalysis/valuetypeConstra...1.0
P610409764[../../allConstraintsAnalysis/valuetypeConstra...1.0
P254501369[../../allConstraintsAnalysis/valuetypeConstra...1.0
P26680168[../../allConstraintsAnalysis/valuetypeConstra...1.0
P7374032[../../allConstraintsAnalysis/valuetypeConstra...1.0
P2839014[../../allConstraintsAnalysis/valuetypeConstra...1.0
P3028013[../../allConstraintsAnalysis/valuetypeConstra...1.0
P3027012[../../allConstraintsAnalysis/valuetypeConstra...1.0
P2127011[../../allConstraintsAnalysis/valuetypeConstra...1.0
P538010[../../allConstraintsAnalysis/valuetypeConstra...1.0
P14307[../../allConstraintsAnalysis/valuetypeConstra...1.0
P442506[../../allConstraintsAnalysis/valuetypeConstra...1.0
P619105[../../allConstraintsAnalysis/valuetypeConstra...1.0
P653305[../../allConstraintsAnalysis/valuetypeConstra...1.0
P653405[../../allConstraintsAnalysis/valuetypeConstra...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5008 0 331026 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P6104 0 9764 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P2545 0 1369 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P2668 0 168 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P7374 0 32 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P2839 0 14 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P3028 0 13 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P3027 0 12 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P2127 0 11 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P538 0 10 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P143 0 7 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P4425 0 6 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P6191 0 5 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P6533 0 5 [../../allConstraintsAnalysis/valuetypeConstra... \n", "P6534 0 5 [../../allConstraintsAnalysis/valuetypeConstra... \n", "\n", " violation_ratio \n", "P5008 1.0 \n", "P6104 1.0 \n", "P2545 1.0 \n", "P2668 1.0 \n", "P7374 1.0 \n", "P2839 1.0 \n", "P3028 1.0 \n", "P3027 1.0 \n", "P2127 1.0 \n", "P538 1.0 \n", "P143 1.0 \n", "P4425 1.0 \n", "P6191 1.0 \n", "P6533 1.0 \n", "P6534 1.0 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": null, "id": "sized-melissa", "metadata": {}, "outputs": [], "source": [ "!cat ../../allConstraintsAnalysis/valuetypeConstraint_Final4/normal/claims.type-constraints.instanceOf.P5008.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 28, "id": "cutting-polyester", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 897.000000\n", "mean 0.083622\n", "std 0.205759\n", "min 0.000000\n", "25% 0.000451\n", "50% 0.006186\n", "75% 0.042042\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 29, "id": "alert-receiver", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 30, "id": "italian-motel", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios (<=0.04)')" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF[valTypeConstDF['violation_ratio'] <= 0.04].violation_ratio.plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios (<=0.04)\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "prescription-ceramic", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/897\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(valTypeConstDF['violation_ratio'] >= 3.950680)}/{len(valTypeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 32, "id": "quiet-gardening", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# valTypeConstDF.sort_values(by=['violation_ratio'],ascending=False).head().paths.values" ] }, { "cell_type": "code", "execution_count": 33, "id": "documentary-pipeline", "metadata": {}, "outputs": [], "source": [ "# !head ../../allConstraintsAnalysis/typeConstraint/normal/claims.type-constraints.instanceOf.P7535.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 34, "id": "tutorial-mineral", "metadata": {}, "outputs": [], "source": [ "for key1 in valueTypeConstViolations.keys():\n", " valueTypeConstViolations[key1]['correct'] = valueTypeConstViolations[key1]['instanceOf']['correct'] + valueTypeConstViolations[key1]['subclass']['correct'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " valueTypeConstViolations[key1]['incorrect'] = valueTypeConstViolations[key1]['instanceOf']['incorrect'] + valueTypeConstViolations[key1]['subclass']['incorrect'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " valueTypeConstViolations[key1]['VR'] = valueTypeConstViolations[key1]['incorrect'] / (valueTypeConstViolations[key1]['correct'] + valueTypeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 35, "id": "satellite-concern", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 11391695, 'incorrect': 3339},\n", " 'subclass': {'correct': 44764, 'incorrect': 1},\n", " 'instanceOfOrSubclass': {'correct': 11638, 'incorrect': 35},\n", " 'propCount': 106,\n", " 'correct': 11448097,\n", " 'incorrect': 3375,\n", " 'VR': 0.00029472193618427394},\n", " 'suggestion': {'instanceOf': {'correct': 46036, 'incorrect': 474},\n", " 'subclass': {'correct': 118, 'incorrect': 18},\n", " 'instanceOfOrSubclass': {'correct': 0, 'incorrect': 0},\n", " 'propCount': 5,\n", " 'correct': 46154,\n", " 'incorrect': 492,\n", " 'VR': 0.01054752819105604},\n", " 'normal': {'instanceOf': {'correct': 88858176, 'incorrect': 689971},\n", " 'subclass': {'correct': 4373665, 'incorrect': 6190},\n", " 'instanceOfOrSubclass': {'correct': 76234047, 'incorrect': 163783},\n", " 'propCount': 786,\n", " 'correct': 169465888,\n", " 'incorrect': 859944,\n", " 'VR': 0.00504881725750208}}" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valueTypeConstViolations" ] }, { "cell_type": "code", "execution_count": 36, "id": "purple-grill", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF.to_csv('../../allConstraintsAnalysis/valueTypeConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "traditional-shakespeare", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 35, "id": "spoken-symphony", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c60118b9694f4447b22fbe6ab5e113ff", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/313 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for value type constraint checks\")" ] }, { "cell_type": "markdown", "id": "motivated-sympathy", "metadata": {}, "source": [ "## Item Requires Statement Constraint" ] }, { "cell_type": "markdown", "id": "chubby-glass", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 37, "id": "funny-batch", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/itemRequiresConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 38, "id": "original-expression", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 39, "id": "adequate-symphony", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2305', 'P2316', 'P2304', 'P2303', 'P6607', 'P4155',\n", " 'P31', 'P2916', 'P4680', 'P2308'], dtype=object)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 40, "id": "infrared-canal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 7182\n", "P2305 2540\n", "P2316 2523\n", "P2303 422\n", "P2304 14\n", "P6607 14\n", "P2916 5\n", "P4680 2\n", "P2308 1\n", "P4155 1\n", "P31 1\n", "Name: label, dtype: int64" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 41, "id": "focused-karen", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 42, "id": "private-boundary", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1id
P1006P1006-P2302-Q21503247-0451ef47-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010P1010-P2302-Q21503247-56183614-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010-P2302-Q21503247-fd256eaf-0NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015P1015-P2302-Q21503247-20e3bfc5-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017P1017-P2302-Q21503247-bbac2ce3-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN [P214] NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN [P31] NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN NaN [Q794] [P17] NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN [P31] NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN [P214] NaN \n", "\n", "label P2316 P2916 P31 P4155 P4680 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN NaN NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN NaN NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 [Q21502408] NaN NaN NaN NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN NaN NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 43, "id": "conceptual-schedule", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 44, "id": "third-hayes", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1006NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 P4680 \\\n", "node1 \n", "P1006 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN [Q794] [P17] NaN [Q21502408] NaN NaN NaN NaN \n", "P1015 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1017 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 \n", "P1006 NaN \n", "P1010 NaN \n", "P1010 NaN \n", "P1015 NaN \n", "P1017 NaN " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "shaped-companion", "metadata": {}, "source": [ "However, there is one anomaly where the property does not have a co-dependency constraint associated with it, but still has a link to this constraint." ] }, { "cell_type": "code", "execution_count": 45, "id": "indian-journal", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P5447NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
P5448NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 \\\n", "node1 \n", "P5447 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "P5448 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "\n", "label P4680 P6607 \n", "node1 \n", "P5447 [Q46466783] NaN \n", "P5448 [Q46466783] NaN " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P4680'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "code", "execution_count": 46, "id": "discrete-template", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q20808382, Q28218485, Q3044918][P39]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q82955][P106]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q5][P31]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q142, Q71084][P27]NaNNaNNaNNaNNaNNaNNaN
....................................
P980NaNNaN[Q34][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q55][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q1852859][P31]NaNNaNNaNNaNNaNNaNNaN
P988NaNNaN[Q928][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P990[Q49678, Q853715]NaN[Q5][P31]NaNNaNNaNNaNNaNNaNNaN
\n", "

2540 rows × 11 columns

\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 \\\n", "node1 \n", "P1010 NaN NaN [Q794] [P17] \n", "P1045 NaN NaN [Q20808382, Q28218485, Q3044918] [P39] \n", "P1045 NaN NaN [Q82955] [P106] \n", "P1045 NaN NaN [Q5] [P31] \n", "P1045 NaN NaN [Q142, Q71084] [P27] \n", "... ... ... ... ... \n", "P980 NaN NaN [Q34] [P17] \n", "P981 NaN NaN [Q55] [P17] \n", "P981 NaN NaN [Q1852859] [P31] \n", "P988 NaN NaN [Q928] [P17] \n", "P990 [Q49678, Q853715] NaN [Q5] [P31] \n", "\n", "label P2308 P2316 P2916 P31 P4155 P4680 P6607 \n", "node1 \n", "P1010 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... ... ... \n", "P980 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN NaN NaN NaN NaN NaN NaN \n", "P988 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P990 NaN NaN NaN NaN NaN NaN NaN \n", "\n", "[2540 rows x 11 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P2305'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "markdown", "id": "forced-christmas", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "markdown", "id": "acquired-floor", "metadata": {}, "source": [ "#### Version 1 - Mandatory + Suggestion + Normal" ] }, { "cell_type": "code", "execution_count": 86, "id": "turkish-establishment", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "37c6488e55b04219bea1391f9dfa247e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + suggestion + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 20 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 11, "id": "peripheral-herald", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'cnt' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcnt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'cnt' is not defined" ] } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 88, "id": "incorporated-logistics", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "27" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fCnt" ] }, { "cell_type": "code", "execution_count": 123, "id": "welcome-welding", "metadata": {}, "outputs": [], "source": [ "# from tqdm.notebook import tqdm\n", "# import os.path\n", "# import os\n", "# folderName = 'codependencyConstraint'\n", "# for prop in tqdm(dfItemRequires.index.unique()):\n", "# for subFolderName in ['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal']:\n", "# if os.path.isfile(\"../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv\") and \\\n", "# os.path.isfile(\"../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv\"):\n", "# os.system(\"kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", "# ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", "# -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_w_exceptions.tsv\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "optimum-blowing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,28):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/codepConst_MSN_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "indoor-verse", "metadata": {}, "source": [ "#### Version 2 - Mandatory + Normal" ] }, { "cell_type": "code", "execution_count": 89, "id": "furnished-paradise", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "edc18c3904494784866a2d8be0d744f4", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 20 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 14, "id": "searching-individual", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "468" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 18, "id": "silver-clarity", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,25):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/codepConst_MN_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "prescription-access", "metadata": {}, "source": [ "#### Version 3 - Mandatory" ] }, { "cell_type": "code", "execution_count": 90, "id": "married-porter", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2bff77e903334ba7aac27fa465eb5541", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 20 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 91, "id": "according-blackberry", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "78" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 17, "id": "extraordinary-drawing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,5):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/codepConst_M_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "subsequent-brown", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 92, "id": "operational-migration", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "23671bc603f64352967ca76b3f0a1aba", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 20 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 93, "id": "harmful-binary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "418" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 19, "id": "advance-married", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,23):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/codepConst_N_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "ranging-journal", "metadata": {}, "source": [ "#### Version 5 - Suggestion" ] }, { "cell_type": "code", "execution_count": 16, "id": "missing-jordan", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bdc942cb17d9460fa2421475c3489d1a", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = suggestion\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplit/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 20 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 17, "id": "soviet-forth", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "97" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 18, "id": "racial-stationery", "metadata": {}, "outputs": [], "source": [ "import os\n", "for i in range(1,6):\n", " os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/codepConst_S_Validator_new_3_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "structural-envelope", "metadata": {}, "source": [ "### Merge all correct/incorrect outputs" ] }, { "cell_type": "code", "execution_count": 17, "id": "joined-invention", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "57dcbdd4c8014c9288dbb92b331a05a6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# import os\n", "# from tqdm.notebook import tqdm\n", "\n", "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysis/codependencyConstraint/\" + folder + \"/\"\n", "# correct_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".correct.\" in f, os.listdir(folderPath))])\n", "# incorrect_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".incorrect.\" in f, os.listdir(folderPath))])\n", "# # print(files_list)\n", "# os.system(\"{ kgtk cat -i \"+ correct_files_list + \" -o \"+folderPath+\"claims.all.correctSuperSet.tsv -v True; } 2> \"+folderPath+\"claims.all.correctSuperSet.log\")\n", "# os.system(\"{ kgtk cat -i \"+ incorrect_files_list + \" -o \"+folderPath+\"claims.all.incorrectSuperSet.tsv -v True; } 2> \"+folderPath+\"claims.all.incorrectSuperSet.log\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "stopped-bolivia", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "68395f72036a469fad8908d916303bcd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# import os\n", "# from tqdm.notebook import tqdm\n", "\n", "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysis/codependencyConstraint_Final/\" + folder + \"/\"\n", "# correct_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".correct.\" in f, os.listdir(folderPath))])\n", "# incorrect_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".incorrect.\" in f, os.listdir(folderPath))])\n", "# # print(files_list)\n", "# os.system(\"{ kgtk cat -i \"+ correct_files_list + \" -o \"+folderPath+\"claims.all.correctSuperSet.tsv; } 2> \"+folderPath+\"claims.all.correctSuperSet.log\")\n", "# os.system(\"{ kgtk cat -i \"+ incorrect_files_list + \" -o \"+folderPath+\"claims.all.incorrectSuperSet.tsv; } 2> \"+folderPath+\"claims.all.incorrectSuperSet.log\")" ] }, { "cell_type": "code", "execution_count": null, "id": "criminal-central", "metadata": {}, "outputs": [], "source": [ "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysis/codependencyConstraint/\" + folder + \"/\"\n", "# folderPathNew = \"../../allConstraintsAnalysis/codependencyConstraint_Final/\" + folder + \"/\"\n", "# os.system(f\"screen -dm kgtk ifnotexists -i {folderPathNew}claims.all.correctSuperSet.tsv --filter-on {folderPath}claims.all.correctSuperSet.tsv -o {folderPathNew}claims.all.correctSuperSet.diff.tsv\")\n", "# os.system(f\"screen -dm kgtk ifnotexists -i {folderPathNew}claims.all.incorrectSuperSet.tsv --filter-on {folderPath}claims.all.incorrectSuperSet.tsv -o {folderPathNew}claims.all.incorrectSuperSet.diff.tsv\")\n", " " ] }, { "cell_type": "markdown", "id": "homeless-pleasure", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 1, "id": "welcome-dependence", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4bb32dc855d74908a5712f7386539c70", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5093ebb08fb3417f8437078912c62872", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1192 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P101815628[../../allConstraintsAnalysis/codependencyCons...
P1191760182477[../../allConstraintsAnalysis/codependencyCons...
P1629753188[../../allConstraintsAnalysis/codependencyCons...
P146435000831[../../allConstraintsAnalysis/codependencyCons...
P134511348[../../allConstraintsAnalysis/codependencyCons...
............
P19263711424855[../../allConstraintsAnalysis/codependencyCons...
P28736428[../../allConstraintsAnalysis/codependencyCons...
P2875354716[../../allConstraintsAnalysis/codependencyCons...
P28765114[../../allConstraintsAnalysis/codependencyCons...
P290021844813[../../allConstraintsAnalysis/codependencyCons...
\n", "

527 rows × 3 columns

\n", "" ], "text/plain": [ " correct incorrect paths\n", "P1018 156 28 [../../allConstraintsAnalysis/codependencyCons...\n", "P119 176018 2477 [../../allConstraintsAnalysis/codependencyCons...\n", "P1629 7531 88 [../../allConstraintsAnalysis/codependencyCons...\n", "P1464 35000 831 [../../allConstraintsAnalysis/codependencyCons...\n", "P1345 113 48 [../../allConstraintsAnalysis/codependencyCons...\n", "... ... ... ...\n", "P19 2637114 24855 [../../allConstraintsAnalysis/codependencyCons...\n", "P2873 642 8 [../../allConstraintsAnalysis/codependencyCons...\n", "P2875 3547 16 [../../allConstraintsAnalysis/codependencyCons...\n", "P2876 51 14 [../../allConstraintsAnalysis/codependencyCons...\n", "P2900 21844 813 [../../allConstraintsAnalysis/codependencyCons...\n", "\n", "[527 rows x 3 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1" ] }, { "cell_type": "code", "execution_count": 7, "id": "powered-residence", "metadata": {}, "outputs": [], "source": [ "codepConstDF1['violation_ratio'] = codepConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "id": "chinese-pressing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1111046327[../../allConstraintsAnalysis/codependencyCons...1.0
P2302042211[../../allConstraintsAnalysis/codependencyCons...1.0
P30630549[../../allConstraintsAnalysis/codependencyCons...1.0
P2303039[../../allConstraintsAnalysis/codependencyCons...1.0
P5447023[../../allConstraintsAnalysis/codependencyCons...1.0
P5448023[../../allConstraintsAnalysis/codependencyCons...1.0
P2308017[../../allConstraintsAnalysis/codependencyCons...1.0
P756908[../../allConstraintsAnalysis/codependencyCons...1.0
P790307[../../allConstraintsAnalysis/codependencyCons...1.0
P57404[../../allConstraintsAnalysis/codependencyCons...1.0
P230603[../../allConstraintsAnalysis/codependencyCons...1.0
P291601[../../allConstraintsAnalysis/codependencyCons...1.0
P243301[../../allConstraintsAnalysis/codependencyCons...1.0
P826401[../../allConstraintsAnalysis/codependencyCons...1.0
P230701[../../allConstraintsAnalysis/codependencyCons...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1111 0 46327 [../../allConstraintsAnalysis/codependencyCons... \n", "P2302 0 42211 [../../allConstraintsAnalysis/codependencyCons... \n", "P3063 0 549 [../../allConstraintsAnalysis/codependencyCons... \n", "P2303 0 39 [../../allConstraintsAnalysis/codependencyCons... \n", "P5447 0 23 [../../allConstraintsAnalysis/codependencyCons... \n", "P5448 0 23 [../../allConstraintsAnalysis/codependencyCons... \n", "P2308 0 17 [../../allConstraintsAnalysis/codependencyCons... \n", "P7569 0 8 [../../allConstraintsAnalysis/codependencyCons... \n", "P7903 0 7 [../../allConstraintsAnalysis/codependencyCons... \n", "P574 0 4 [../../allConstraintsAnalysis/codependencyCons... \n", "P2306 0 3 [../../allConstraintsAnalysis/codependencyCons... \n", "P2916 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P2433 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P8264 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P2307 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P1111 1.0 \n", "P2302 1.0 \n", "P3063 1.0 \n", "P2303 1.0 \n", "P5447 1.0 \n", "P5448 1.0 \n", "P2308 1.0 \n", "P7569 1.0 \n", "P7903 1.0 \n", "P574 1.0 \n", "P2306 1.0 \n", "P2916 1.0 \n", "P2433 1.0 \n", "P8264 1.0 \n", "P2307 1.0 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['violation_ratio', 'incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 9, "id": "armed-constitution", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../../allConstraintsAnalysis/codependencyConstraint_Final/Mand_Sugg_Normal/claims.P2302.correct.tsv',\n", " '../../allConstraintsAnalysis/codependencyConstraint_Final/Mand_Sugg_Normal/claims.P2302.incorrect.tsv']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(codepConstDF1.loc['P2302']['paths'])" ] }, { "cell_type": "code", "execution_count": 10, "id": "continued-desire", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "P10-P2302-Q21502404-d012aef4-0\tP10\tP2302\tQ21502404\tnormal\twikibase-item\r\n", "P10-P2302-Q21510851-5224fe0b-0\tP10\tP2302\tQ21510851\tnormal\twikibase-item\r\n", "P10-P2302-Q21510852-dde2f0ce-0\tP10\tP2302\tQ21510852\tnormal\twikibase-item\r\n", "P10-P2302-Q52004125-d0288d06-0\tP10\tP2302\tQ52004125\tnormal\twikibase-item\r\n", "P10-P2302-Q53869507-974ce3b1-0\tP10\tP2302\tQ53869507\tnormal\twikibase-item\r\n", "P1000-P2302-Q21510856-b2772a67-0\tP1000\tP2302\tQ21510856\tnormal\twikibase-item\r\n", "P1000-P2302-Q21510865-1f5093e9-0\tP1000\tP2302\tQ21510865\tnormal\twikibase-item\r\n", "P1000-P2302-Q53869507-36dbee67-0\tP1000\tP2302\tQ53869507\tnormal\twikibase-item\r\n", "P1001-P2302-Q21502838-3cc7ade2-0\tP1001\tP2302\tQ21502838\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysis/codependencyConstraint_Final/Mand_Sugg_Normal/claims.P2302.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 11, "id": "demonstrated-debut", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142962988711699[../../allConstraintsAnalysis/codependencyCons...0.193676
P7342001246704728[../../allConstraintsAnalysis/codependencyCons...0.260434
P43331028893435483[../../allConstraintsAnalysis/codependencyCons...0.013841
P1951132062384602[../../allConstraintsAnalysis/codependencyCons...0.253584
P5694646728241105[../../allConstraintsAnalysis/codependencyCons...0.049328
P13110056935198870[../../allConstraintsAnalysis/codependencyCons...0.019391
P2755955123578[../../allConstraintsAnalysis/codependencyCons...0.954027
P2860174402886114713[../../allConstraintsAnalysis/codependencyCons...0.000657
P570233261194455[../../allConstraintsAnalysis/codependencyCons...0.038917
P20178479290131[../../allConstraintsAnalysis/codependencyCons...0.515261
P1435189387479479[../../allConstraintsAnalysis/codependencyCons...0.040276
P19223045163440[../../allConstraintsAnalysis/codependencyCons...0.675677
P7084525354258[../../allConstraintsAnalysis/codependencyCons...0.545246
P19711752249904[../../allConstraintsAnalysis/codependencyCons...0.298066
P15983654446915[../../allConstraintsAnalysis/codependencyCons...0.562132
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2962988 711699 [../../allConstraintsAnalysis/codependencyCons... \n", "P734 2001246 704728 [../../allConstraintsAnalysis/codependencyCons... \n", "P433 31028893 435483 [../../allConstraintsAnalysis/codependencyCons... \n", "P195 1132062 384602 [../../allConstraintsAnalysis/codependencyCons... \n", "P569 4646728 241105 [../../allConstraintsAnalysis/codependencyCons... \n", "P131 10056935 198870 [../../allConstraintsAnalysis/codependencyCons... \n", "P275 5955 123578 [../../allConstraintsAnalysis/codependencyCons... \n", "P2860 174402886 114713 [../../allConstraintsAnalysis/codependencyCons... \n", "P570 2332611 94455 [../../allConstraintsAnalysis/codependencyCons... \n", "P2017 84792 90131 [../../allConstraintsAnalysis/codependencyCons... \n", "P1435 1893874 79479 [../../allConstraintsAnalysis/codependencyCons... \n", "P1922 30451 63440 [../../allConstraintsAnalysis/codependencyCons... \n", "P708 45253 54258 [../../allConstraintsAnalysis/codependencyCons... \n", "P197 117522 49904 [../../allConstraintsAnalysis/codependencyCons... \n", "P1598 36544 46915 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P2214 0.193676 \n", "P734 0.260434 \n", "P433 0.013841 \n", "P195 0.253584 \n", "P569 0.049328 \n", "P131 0.019391 \n", "P275 0.954027 \n", "P2860 0.000657 \n", "P570 0.038917 \n", "P2017 0.515261 \n", "P1435 0.040276 \n", "P1922 0.675677 \n", "P708 0.545246 \n", "P197 0.298066 \n", "P1598 0.562132 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 12, "id": "developed-zimbabwe", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 527.000000\n", "mean 0.206083\n", "std 0.305674\n", "min 0.000000\n", "25% 0.000929\n", "50% 0.032847\n", "75% 0.298101\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 13, "id": "unknown-johnston", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "exceptional-dakota", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1[codepConstDF1['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "interior-joseph", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/527\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF1['violation_ratio'] >= 3.539484)}/{len(codepConstDF1)}\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "variable-desert", "metadata": {}, "outputs": [], "source": [ "codepConstDF1.to_csv('../../allConstraintsAnalysis/codepConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "greater-genetics", "metadata": {}, "source": [ "#### Version 2 - Mand Normal" ] }, { "cell_type": "code", "execution_count": 179, "id": "constant-chance", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF2 = pd.DataFrame(codepConstViolations['Mand_Normal']).T" ] }, { "cell_type": "code", "execution_count": 180, "id": "included-adjustment", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P1196850373039[../../allConstraintsAnalysis/codependencyCons...
P13833172641[../../allConstraintsAnalysis/codependencyCons...
P101815628[../../allConstraintsAnalysis/codependencyCons...
P1538194334152[../../allConstraintsAnalysis/codependencyCons...
P168543210[../../allConstraintsAnalysis/codependencyCons...
............
P2962268032[../../allConstraintsAnalysis/codependencyCons...
P18319026140[../../allConstraintsAnalysis/codependencyCons...
P199924030[../../allConstraintsAnalysis/codependencyCons...
P2009101915[../../allConstraintsAnalysis/codependencyCons...
P137679449106[../../allConstraintsAnalysis/codependencyCons...
\n", "

468 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1196 85037 3039 [../../allConstraintsAnalysis/codependencyCons...\n", "P1383 31726 41 [../../allConstraintsAnalysis/codependencyCons...\n", "P1018 156 28 [../../allConstraintsAnalysis/codependencyCons...\n", "P1538 194334 152 [../../allConstraintsAnalysis/codependencyCons...\n", "P1685 4321 0 [../../allConstraintsAnalysis/codependencyCons...\n", "... ... ... ...\n", "P2962 26803 2 [../../allConstraintsAnalysis/codependencyCons...\n", "P183 19026 140 [../../allConstraintsAnalysis/codependencyCons...\n", "P1999 2403 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P2009 1019 15 [../../allConstraintsAnalysis/codependencyCons...\n", "P1376 79449 106 [../../allConstraintsAnalysis/codependencyCons...\n", "\n", "[468 rows x 3 columns]" ] }, "execution_count": 180, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2" ] }, { "cell_type": "code", "execution_count": 181, "id": "fundamental-knowing", "metadata": {}, "outputs": [], "source": [ "codepConstDF2['violation_ratio'] = codepConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 182, "id": "harmful-discipline", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P230701[../../allConstraintsAnalysis/codependencyCons...1.0
P1111046327[../../allConstraintsAnalysis/codependencyCons...1.0
P756908[../../allConstraintsAnalysis/codependencyCons...1.0
P291601[../../allConstraintsAnalysis/codependencyCons...1.0
P230901[../../allConstraintsAnalysis/codependencyCons...1.0
P790307[../../allConstraintsAnalysis/codependencyCons...1.0
P5447023[../../allConstraintsAnalysis/codependencyCons...1.0
P231101[../../allConstraintsAnalysis/codependencyCons...1.0
P243301[../../allConstraintsAnalysis/codependencyCons...1.0
P231301[../../allConstraintsAnalysis/codependencyCons...1.0
P5448023[../../allConstraintsAnalysis/codependencyCons...1.0
P2308017[../../allConstraintsAnalysis/codependencyCons...1.0
P2303039[../../allConstraintsAnalysis/codependencyCons...1.0
P230603[../../allConstraintsAnalysis/codependencyCons...1.0
P231201[../../allConstraintsAnalysis/codependencyCons...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2307 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P1111 0 46327 [../../allConstraintsAnalysis/codependencyCons... \n", "P7569 0 8 [../../allConstraintsAnalysis/codependencyCons... \n", "P2916 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P2309 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P7903 0 7 [../../allConstraintsAnalysis/codependencyCons... \n", "P5447 0 23 [../../allConstraintsAnalysis/codependencyCons... \n", "P2311 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P2433 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P2313 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P5448 0 23 [../../allConstraintsAnalysis/codependencyCons... \n", "P2308 0 17 [../../allConstraintsAnalysis/codependencyCons... \n", "P2303 0 39 [../../allConstraintsAnalysis/codependencyCons... \n", "P2306 0 3 [../../allConstraintsAnalysis/codependencyCons... \n", "P2312 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P2307 1.0 \n", "P1111 1.0 \n", "P7569 1.0 \n", "P2916 1.0 \n", "P2309 1.0 \n", "P7903 1.0 \n", "P5447 1.0 \n", "P2311 1.0 \n", "P2433 1.0 \n", "P2313 1.0 \n", "P5448 1.0 \n", "P2308 1.0 \n", "P2303 1.0 \n", "P2306 1.0 \n", "P2312 1.0 " ] }, "execution_count": 182, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 183, "id": "unlikely-chamber", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142962988711699[../../allConstraintsAnalysis/codependencyCons...0.193676
P43331028893435483[../../allConstraintsAnalysis/codependencyCons...0.013841
P2755955123578[../../allConstraintsAnalysis/codependencyCons...0.954027
P2860174402886114713[../../allConstraintsAnalysis/codependencyCons...0.000657
P1435189387479479[../../allConstraintsAnalysis/codependencyCons...0.040276
P7084525354258[../../allConstraintsAnalysis/codependencyCons...0.545246
P19711752249904[../../allConstraintsAnalysis/codependencyCons...0.298066
P15983697846481[../../allConstraintsAnalysis/codependencyCons...0.556932
P1111046327[../../allConstraintsAnalysis/codependencyCons...1.000000
P2248402041566[../../allConstraintsAnalysis/codependencyCons...0.911815
P2325407140611[../../allConstraintsAnalysis/codependencyCons...0.908889
P856123929238026[../../allConstraintsAnalysis/codependencyCons...0.029770
P2243402536540[../../allConstraintsAnalysis/codependencyCons...0.900777
P2244402736527[../../allConstraintsAnalysis/codependencyCons...0.900700
P41335779333607[../../allConstraintsAnalysis/codependencyCons...0.085864
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2962988 711699 [../../allConstraintsAnalysis/codependencyCons... \n", "P433 31028893 435483 [../../allConstraintsAnalysis/codependencyCons... \n", "P275 5955 123578 [../../allConstraintsAnalysis/codependencyCons... \n", "P2860 174402886 114713 [../../allConstraintsAnalysis/codependencyCons... \n", "P1435 1893874 79479 [../../allConstraintsAnalysis/codependencyCons... \n", "P708 45253 54258 [../../allConstraintsAnalysis/codependencyCons... \n", "P197 117522 49904 [../../allConstraintsAnalysis/codependencyCons... \n", "P1598 36978 46481 [../../allConstraintsAnalysis/codependencyCons... \n", "P1111 0 46327 [../../allConstraintsAnalysis/codependencyCons... \n", "P2248 4020 41566 [../../allConstraintsAnalysis/codependencyCons... \n", "P2325 4071 40611 [../../allConstraintsAnalysis/codependencyCons... \n", "P856 1239292 38026 [../../allConstraintsAnalysis/codependencyCons... \n", "P2243 4025 36540 [../../allConstraintsAnalysis/codependencyCons... \n", "P2244 4027 36527 [../../allConstraintsAnalysis/codependencyCons... \n", "P413 357793 33607 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P2214 0.193676 \n", "P433 0.013841 \n", "P275 0.954027 \n", "P2860 0.000657 \n", "P1435 0.040276 \n", "P708 0.545246 \n", "P197 0.298066 \n", "P1598 0.556932 \n", "P1111 1.000000 \n", "P2248 0.911815 \n", "P2325 0.908889 \n", "P856 0.029770 \n", "P2243 0.900777 \n", "P2244 0.900700 \n", "P413 0.085864 " ] }, "execution_count": 183, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 184, "id": "violent-match", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 468.000000\n", "mean 0.169189\n", "std 0.285155\n", "min 0.000000\n", "25% 0.000663\n", "50% 0.016656\n", "75% 0.193986\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 184, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 185, "id": "educational-thickness", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios')" ] }, "execution_count": 185, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 186, "id": "latin-mitchell", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5')" ] }, "execution_count": 186, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2[codepConstDF2['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 187, "id": "asian-forwarding", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/468\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF2['violation_ratio'] >= 2.290915)}/{len(codepConstDF2)}\")" ] }, { "cell_type": "markdown", "id": "destroyed-flash", "metadata": {}, "source": [ "#### Version 3 - Mand" ] }, { "cell_type": "code", "execution_count": 188, "id": "consecutive-plenty", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF3 = pd.DataFrame(codepConstViolations['Mand']).T" ] }, { "cell_type": "code", "execution_count": 189, "id": "digital-mileage", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P1081123000[../../allConstraintsAnalysis/codependencyCons...
P202123031[../../allConstraintsAnalysis/codependencyCons...
P37443110[../../allConstraintsAnalysis/codependencyCons...
P598213260[../../allConstraintsAnalysis/codependencyCons...
P38155800[../../allConstraintsAnalysis/codependencyCons...
............
P187916950[../../allConstraintsAnalysis/codependencyCons...
P364870[../../allConstraintsAnalysis/codependencyCons...
P19716739234[../../allConstraintsAnalysis/codependencyCons...
P199076191[../../allConstraintsAnalysis/codependencyCons...
P2009101915[../../allConstraintsAnalysis/codependencyCons...
\n", "

78 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1081 12300 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P2021 2303 1 [../../allConstraintsAnalysis/codependencyCons...\n", "P3744 311 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P5982 1326 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P3815 580 0 [../../allConstraintsAnalysis/codependencyCons...\n", "... ... ... ...\n", "P1879 1695 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P3648 7 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P197 167392 34 [../../allConstraintsAnalysis/codependencyCons...\n", "P1990 7619 1 [../../allConstraintsAnalysis/codependencyCons...\n", "P2009 1019 15 [../../allConstraintsAnalysis/codependencyCons...\n", "\n", "[78 rows x 3 columns]" ] }, "execution_count": 189, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3" ] }, { "cell_type": "code", "execution_count": 190, "id": "formed-battle", "metadata": {}, "outputs": [], "source": [ "codepConstDF3['violation_ratio'] = codepConstDF3.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 191, "id": "numerous-construction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P5051163[../../allConstraintsAnalysis/codependencyCons...63.000000
P434171[../../allConstraintsAnalysis/codependencyCons...0.142857
P2095383[../../allConstraintsAnalysis/codependencyCons...0.078947
P3931225064[../../allConstraintsAnalysis/codependencyCons...0.028444
P17313639[../../allConstraintsAnalysis/codependencyCons...0.024793
P2009101915[../../allConstraintsAnalysis/codependencyCons...0.014720
P2461692[../../allConstraintsAnalysis/codependencyCons...0.011834
P826401[../../allConstraintsAnalysis/codependencyCons...0.010000
P9445774[../../allConstraintsAnalysis/codependencyCons...0.006932
P1560323520[../../allConstraintsAnalysis/codependencyCons...0.006182
P26798875[../../allConstraintsAnalysis/codependencyCons...0.005637
P4511050236[../../allConstraintsAnalysis/codependencyCons...0.003428
P236516505[../../allConstraintsAnalysis/codependencyCons...0.003030
P9152836445[../../allConstraintsAnalysis/codependencyCons...0.001587
P16358901[../../allConstraintsAnalysis/codependencyCons...0.001124
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5051 1 63 [../../allConstraintsAnalysis/codependencyCons... \n", "P4341 7 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P2095 38 3 [../../allConstraintsAnalysis/codependencyCons... \n", "P3931 2250 64 [../../allConstraintsAnalysis/codependencyCons... \n", "P1731 363 9 [../../allConstraintsAnalysis/codependencyCons... \n", "P2009 1019 15 [../../allConstraintsAnalysis/codependencyCons... \n", "P246 169 2 [../../allConstraintsAnalysis/codependencyCons... \n", "P8264 0 1 [../../allConstraintsAnalysis/codependencyCons... \n", "P944 577 4 [../../allConstraintsAnalysis/codependencyCons... \n", "P1560 3235 20 [../../allConstraintsAnalysis/codependencyCons... \n", "P2679 887 5 [../../allConstraintsAnalysis/codependencyCons... \n", "P451 10502 36 [../../allConstraintsAnalysis/codependencyCons... \n", "P2365 1650 5 [../../allConstraintsAnalysis/codependencyCons... \n", "P915 28364 45 [../../allConstraintsAnalysis/codependencyCons... \n", "P1635 890 1 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P5051 63.000000 \n", "P4341 0.142857 \n", "P2095 0.078947 \n", "P3931 0.028444 \n", "P1731 0.024793 \n", "P2009 0.014720 \n", "P246 0.011834 \n", "P8264 0.010000 \n", "P944 0.006932 \n", "P1560 0.006182 \n", "P2679 0.005637 \n", "P451 0.003428 \n", "P2365 0.003030 \n", "P915 0.001587 \n", "P1635 0.001124 " ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 192, "id": "identified-marble", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "correct 1980\n", "incorrect 1\n", "paths [../../allConstraintsAnalysis/codependencyCons...\n", "violation_ratio 0.000505\n", "Name: P1713, dtype: object" ] }, "execution_count": 192, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.loc['P1713']" ] }, { "cell_type": "code", "execution_count": 193, "id": "established-mounting", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "Q4681882-P1713-d878eb-9fff460e-0\tQ4681882\tP1713\t\"https://heightnetworth.com/adele-givens-net-worth-2020/\"\tnormal\turl\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysis/codependencyConstraint_Final/Mand/claims.P1713.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 77, "id": "naval-functionality", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "Q1000195-P1713-e792ce-9b50511b-0\tQ1000195\tP1713\t\"http://webarchiv.bundestag.de/archive/2007/0206/mdb/mdb13/bio/T/thiesdi0.html\"\tnormal\turl\r\n", "Q100218-P1713-3e741f-4bb9633d-0\tQ100218\tP1713\t\"http://webarchiv.bundestag.de/archive/2013/1212/bundestag/abgeordnete17/biografien/B/bunge_martina.html\"\tnormal\turl\r\n", "Q100250-P1713-33d6e4-326e7a64-0\tQ100250\tP1713\t\"http://webarchiv.bundestag.de/archive/2010/0427/bundestag/abgeordnete/bio/B/bauerwo0.html\"\tnormal\turl\r\n", "Q100357-P1713-f75a30-72e42938-0\tQ100357\tP1713\t\"http://webarchiv.bundestag.de/archive/2007/0206/mdb/mdb14/bio/J/juengsa0.html\"\tnormal\turl\r\n", "Q100615-P1713-cd17a0-c89f234d-0\tQ100615\tP1713\t\"http://webarchiv.bundestag.de/archive/2013/1212/bundestag/abgeordnete17/biografien/P/ploetz_yvonne.html\"\tnormal\turl\r\n", "Q100617-P1713-2396a5-62789180-0\tQ100617\tP1713\t\"http://webarchiv.bundestag.de/archive/2013/1212/bundestag/abgeordnete17/biografien/S/schwanitz_rolf.html\"\tnormal\turl\r\n", "Q100717883-P1713-bd827f-36a0ade3-0\tQ100717883\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/W/524570-524570\"\tnormal\turl\r\n", "Q100797-P1713-c9a2d4-f898748f-0\tQ100797\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/P/pronold_florian/522720\"\tnormal\turl\r\n", "Q100960-P1713-7a9695-4d24386e-0\tQ100960\tP1713\t\"http://webarchiv.bundestag.de/archive/2007/0206/mdb/mdb14/bio/G/geigemi0.html\"\tnormal\turl\r\n", "Q100986-P1713-09ed67-8a983157-0\tQ100986\tP1713\t\"http://www.bundestag.de/bundestag/abgeordnete18/biografien/B/becker_dirk/258158\"\tnormal\turl\r\n", "Q100986-P1713-0c450a-871bc22b-0\tQ100986\tP1713\t\"http://www.bundestag.de/bundestag/abgeordnete18/biografien/B/becker_dirk.html\"\tnormal\turl\r\n", "Q101192-P1713-712026-5576cf80-0\tQ101192\tP1713\t\"http://www.bundestag.de/bundestag/abgeordnete18/biografien/B/bartels_hans_peter.html\"\tnormal\turl\r\n", "Q101192-P1713-c1d1cb-90bf9b39-0\tQ101192\tP1713\t\"http://www.bundestag.de/bundestag/abgeordnete18/biografien/B/bartels_hans_peter/258234\"\tnormal\turl\r\n", "Q101206-P1713-f2dc25-f3b5f511-0\tQ101206\tP1713\t\"http://webarchiv.bundestag.de/archive/2013/1212/bundestag/abgeordnete17/biografien/S/schreiner_ottmar.html\"\tnormal\turl\r\n", "Q101296-P1713-5850b9-d4983cc9-0\tQ101296\tP1713\t\"http://webarchiv.bundestag.de/archive/2013/1212/bundestag/abgeordnete17/biografien/M/merkel_petra.html\"\tnormal\turl\r\n", "Q101481413-P1713-e36b80-6e010c1d-0\tQ101481413\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/D/518958-518958\"\tnormal\turl\r\n", "Q101504-P1713-2b19ec-bab3dc39-0\tQ101504\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/W/willsch_klaus_peter/524612\"\tnormal\turl\r\n", "Q101533533-P1713-bf5149-3cb91260-0\tQ101533533\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/N/522272-522272\"\tnormal\turl\r\n", "Q101617-P1713-d046b6-7e1394d5-0\tQ101617\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/B/bluhm_heidrun/518490\"\tnormal\turl\r\n", "Q101632-P1713-0a7355-6fa52c85-0\tQ101632\tP1713\t\"http://webarchiv.bundestag.de/archive/2010/0427/bundestag/abgeordnete/bio/A/albacpe0.html\"\tnormal\turl\r\n", "Q1016467-P1713-ff16af-475eaf2e-0\tQ1016467\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/L/lischka_burkhard/521668\"\tnormal\turl\r\n", "Q1016541-P1713-c620a6-bf28fbf5-0\tQ1016541\tP1713\t\"http://webarchiv.bundestag.de/archive/2013/1212/bundestag/abgeordnete17/biografien/M/mueller_soenksen_burkhardt.html\"\tnormal\turl\r\n", "Q101662-P1713-9e254f-1d37dd01-0\tQ101662\tP1713\t\"http://webarchiv.bundestag.de/archive/2013/1212/bundestag/abgeordnete17/biografien/H/hagemann_klaus.html\"\tnormal\turl\r\n", "Q101701-P1713-d2d742-b14a2f67-0\tQ101701\tP1713\t\"http://webarchiv.bundestag.de/archive/2007/0206/mdb/mdb15/bio/G/goennta0.html\"\tnormal\turl\r\n", "Q101703-P1713-86bfef-179d0c06-0\tQ101703\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/G/grotelueschen_astrid/519912\"\tnormal\turl\r\n", "Q1019016-P1713-707a97-4f43e3ab-0\tQ1019016\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/B/bas_baerbel/518186\"\tnormal\turl\r\n", "Q1019023-P1713-4b05d8-54e704ae-0\tQ1019023\tP1713\t\"http://webarchiv.bundestag.de/archive/2007/0206/mdb/mdb14/bio/G/grygiba0.html\"\tnormal\turl\r\n", "Q1019029-P1713-6f6eed-942d192f-0\tQ1019029\tP1713\t\"https://www.bundestag.de/abgeordnete/biografien/K/kofler_baerbel/521198\"\tnormal\turl\r\n", "Q1019050-P1713-3d5c18-83064a52-0\tQ1019050\tP1713\t\"http://webarchiv.bundestag.de/archive/2007/0206/mdb/mdb14/bio/S/sothmba0.html\"\tnormal\turl\r\n", "Q101967-P1713-b19158-a5b76799-0\tQ101967\tP1713\t\"http://webarchiv.bundestag.de/archive/2010/0427/bundestag/abgeordnete/bio/B/bruenmo0.html\"\tnormal\turl\r\n", "Q101987-P1713-e8184f-e2226a1f-0\tQ101987\tP1713\t\"http://webarchiv.bundestag.de/archive/2007/0206/mdb/mdb15/bio/L/lucygch0.html\"\tnormal\turl\r\n", "Q4681882-P1713-d878eb-9fff460e-0\tQ4681882\tP1713\t\"https://heightnetworth.com/adele-givens-net-worth-2020/\"\tnormal\turl\r\n" ] } ], "source": [ "!cat ../../allConstraintsAnalysis/codependencyConstraint/Mand/claims.P1713.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 78, "id": "imposed-bibliography", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P7959655743376[../../allConstraintsAnalysis/codependencyCons...0.000573
P3931225064[../../allConstraintsAnalysis/codependencyCons...0.028444
P5051163[../../allConstraintsAnalysis/codependencyCons...63.000000
P9152836445[../../allConstraintsAnalysis/codependencyCons...0.001587
P4511050236[../../allConstraintsAnalysis/codependencyCons...0.003428
P19716739234[../../allConstraintsAnalysis/codependencyCons...0.000203
P1560323520[../../allConstraintsAnalysis/codependencyCons...0.006182
P2009101915[../../allConstraintsAnalysis/codependencyCons...0.014720
P17313639[../../allConstraintsAnalysis/codependencyCons...0.024793
P1196880706[../../allConstraintsAnalysis/codependencyCons...0.000068
P236516505[../../allConstraintsAnalysis/codependencyCons...0.003030
P26798875[../../allConstraintsAnalysis/codependencyCons...0.005637
P9445774[../../allConstraintsAnalysis/codependencyCons...0.006932
P1411217094[../../allConstraintsAnalysis/codependencyCons...0.000033
P180940063[../../allConstraintsAnalysis/codependencyCons...0.000749
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P7959 655743 376 [../../allConstraintsAnalysis/codependencyCons... \n", "P3931 2250 64 [../../allConstraintsAnalysis/codependencyCons... \n", "P5051 1 63 [../../allConstraintsAnalysis/codependencyCons... \n", "P915 28364 45 [../../allConstraintsAnalysis/codependencyCons... \n", "P451 10502 36 [../../allConstraintsAnalysis/codependencyCons... \n", "P197 167392 34 [../../allConstraintsAnalysis/codependencyCons... \n", "P1560 3235 20 [../../allConstraintsAnalysis/codependencyCons... \n", "P2009 1019 15 [../../allConstraintsAnalysis/codependencyCons... \n", "P1731 363 9 [../../allConstraintsAnalysis/codependencyCons... \n", "P1196 88070 6 [../../allConstraintsAnalysis/codependencyCons... \n", "P2365 1650 5 [../../allConstraintsAnalysis/codependencyCons... \n", "P2679 887 5 [../../allConstraintsAnalysis/codependencyCons... \n", "P944 577 4 [../../allConstraintsAnalysis/codependencyCons... \n", "P141 121709 4 [../../allConstraintsAnalysis/codependencyCons... \n", "P1809 4006 3 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P7959 0.000573 \n", "P3931 0.028444 \n", "P5051 63.000000 \n", "P915 0.001587 \n", "P451 0.003428 \n", "P197 0.000203 \n", "P1560 0.006182 \n", "P2009 0.014720 \n", "P1731 0.024793 \n", "P1196 0.000068 \n", "P2365 0.003030 \n", "P2679 0.005637 \n", "P944 0.006932 \n", "P141 0.000033 \n", "P1809 0.000749 " ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 79, "id": "emotional-crown", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 78.000000\n", "mean 0.812130\n", "std 7.132861\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 0.000558\n", "max 63.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 80, "id": "certain-freeze", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios')" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 81, "id": "cooperative-ownership", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005')" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3[codepConstDF3['violation_ratio'] <= 0.0005].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005\")" ] }, { "cell_type": "code", "execution_count": 82, "id": "studied-inclusion", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 1/78\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF3['violation_ratio'] >= 0.922928)}/{len(codepConstDF3)}\")" ] }, { "cell_type": "markdown", "id": "protective-brazil", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 83, "id": "laughing-pressing", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF4 = pd.DataFrame(codepConstViolations['Normal']).T" ] }, { "cell_type": "code", "execution_count": 84, "id": "loving-swift", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P101815628[../../allConstraintsAnalysis/codependencyCons...
P1540236379456[../../allConstraintsAnalysis/codependencyCons...
P128313830[../../allConstraintsAnalysis/codependencyCons...
P18964779322[../../allConstraintsAnalysis/codependencyCons...
P17128567942307[../../allConstraintsAnalysis/codependencyCons...
............
P291601[../../allConstraintsAnalysis/codependencyCons...
P187339461[../../allConstraintsAnalysis/codependencyCons...
P292326872738[../../allConstraintsAnalysis/codependencyCons...
P187916950[../../allConstraintsAnalysis/codependencyCons...
P2929551181[../../allConstraintsAnalysis/codependencyCons...
\n", "

418 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1018 156 28 [../../allConstraintsAnalysis/codependencyCons...\n", "P1540 236379 456 [../../allConstraintsAnalysis/codependencyCons...\n", "P1283 1383 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P1896 4779 322 [../../allConstraintsAnalysis/codependencyCons...\n", "P171 2856794 2307 [../../allConstraintsAnalysis/codependencyCons...\n", "... ... ... ...\n", "P2916 0 1 [../../allConstraintsAnalysis/codependencyCons...\n", "P1873 394 61 [../../allConstraintsAnalysis/codependencyCons...\n", "P2923 2687 2738 [../../allConstraintsAnalysis/codependencyCons...\n", "P1879 1695 0 [../../allConstraintsAnalysis/codependencyCons...\n", "P2929 5511 81 [../../allConstraintsAnalysis/codependencyCons...\n", "\n", "[418 rows x 3 columns]" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4" ] }, { "cell_type": "code", "execution_count": 85, "id": "north-christian", "metadata": {}, "outputs": [], "source": [ "codepConstDF4['violation_ratio'] = codepConstDF4.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 86, "id": "closing-causing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1111046327[../../allConstraintsAnalysis/codependencyCons...463.270000
P1995809985[../../allConstraintsAnalysis/codependencyCons...124.812500
P76813315143[../../allConstraintsAnalysis/codependencyCons...113.857143
P450141922682[../../allConstraintsAnalysis/codependencyCons...54.133652
P27154158[../../allConstraintsAnalysis/codependencyCons...39.500000
P2755955123578[../../allConstraintsAnalysis/codependencyCons...20.751973
P2376119[../../allConstraintsAnalysis/codependencyCons...19.000000
P39122973705[../../allConstraintsAnalysis/codependencyCons...12.474747
P272012134[../../allConstraintsAnalysis/codependencyCons...11.166667
P2248402041566[../../allConstraintsAnalysis/codependencyCons...10.339801
P2325407140611[../../allConstraintsAnalysis/codependencyCons...9.975682
P2243402536540[../../allConstraintsAnalysis/codependencyCons...9.078261
P2244402736527[../../allConstraintsAnalysis/codependencyCons...9.070524
P34485474302[../../allConstraintsAnalysis/codependencyCons...7.864717
P770765445[../../allConstraintsAnalysis/codependencyCons...6.846154
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1111 0 46327 [../../allConstraintsAnalysis/codependencyCons... \n", "P1995 80 9985 [../../allConstraintsAnalysis/codependencyCons... \n", "P768 133 15143 [../../allConstraintsAnalysis/codependencyCons... \n", "P4501 419 22682 [../../allConstraintsAnalysis/codependencyCons... \n", "P2715 4 158 [../../allConstraintsAnalysis/codependencyCons... \n", "P275 5955 123578 [../../allConstraintsAnalysis/codependencyCons... \n", "P2376 1 19 [../../allConstraintsAnalysis/codependencyCons... \n", "P3912 297 3705 [../../allConstraintsAnalysis/codependencyCons... \n", "P2720 12 134 [../../allConstraintsAnalysis/codependencyCons... \n", "P2248 4020 41566 [../../allConstraintsAnalysis/codependencyCons... \n", "P2325 4071 40611 [../../allConstraintsAnalysis/codependencyCons... \n", "P2243 4025 36540 [../../allConstraintsAnalysis/codependencyCons... \n", "P2244 4027 36527 [../../allConstraintsAnalysis/codependencyCons... \n", "P3448 547 4302 [../../allConstraintsAnalysis/codependencyCons... \n", "P7707 65 445 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P1111 463.270000 \n", "P1995 124.812500 \n", "P768 113.857143 \n", "P4501 54.133652 \n", "P2715 39.500000 \n", "P275 20.751973 \n", "P2376 19.000000 \n", "P3912 12.474747 \n", "P2720 11.166667 \n", "P2248 10.339801 \n", "P2325 9.975682 \n", "P2243 9.078261 \n", "P2244 9.070524 \n", "P3448 7.864717 \n", "P7707 6.846154 " ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 87, "id": "weighted-input", "metadata": {}, "outputs": [], "source": [ "# list(codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(5).paths)" ] }, { "cell_type": "code", "execution_count": 88, "id": "brief-effect", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P22142962988711699[../../allConstraintsAnalysis/codependencyCons...0.240196
P43331028893435483[../../allConstraintsAnalysis/codependencyCons...0.014035
P2755955123578[../../allConstraintsAnalysis/codependencyCons...20.751973
P2860174402886114713[../../allConstraintsAnalysis/codependencyCons...0.000658
P1435189387479479[../../allConstraintsAnalysis/codependencyCons...0.041966
P7084525354258[../../allConstraintsAnalysis/codependencyCons...1.198992
P19711752349903[../../allConstraintsAnalysis/codependencyCons...0.424623
P15983697846481[../../allConstraintsAnalysis/codependencyCons...1.256991
P1111046327[../../allConstraintsAnalysis/codependencyCons...463.270000
P2248402041566[../../allConstraintsAnalysis/codependencyCons...10.339801
P2325407140611[../../allConstraintsAnalysis/codependencyCons...9.975682
P856123929238026[../../allConstraintsAnalysis/codependencyCons...0.030684
P2243402536540[../../allConstraintsAnalysis/codependencyCons...9.078261
P2244402736527[../../allConstraintsAnalysis/codependencyCons...9.070524
P41335779333607[../../allConstraintsAnalysis/codependencyCons...0.093929
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2214 2962988 711699 [../../allConstraintsAnalysis/codependencyCons... \n", "P433 31028893 435483 [../../allConstraintsAnalysis/codependencyCons... \n", "P275 5955 123578 [../../allConstraintsAnalysis/codependencyCons... \n", "P2860 174402886 114713 [../../allConstraintsAnalysis/codependencyCons... \n", "P1435 1893874 79479 [../../allConstraintsAnalysis/codependencyCons... \n", "P708 45253 54258 [../../allConstraintsAnalysis/codependencyCons... \n", "P197 117523 49903 [../../allConstraintsAnalysis/codependencyCons... \n", "P1598 36978 46481 [../../allConstraintsAnalysis/codependencyCons... \n", "P1111 0 46327 [../../allConstraintsAnalysis/codependencyCons... \n", "P2248 4020 41566 [../../allConstraintsAnalysis/codependencyCons... \n", "P2325 4071 40611 [../../allConstraintsAnalysis/codependencyCons... \n", "P856 1239292 38026 [../../allConstraintsAnalysis/codependencyCons... \n", "P2243 4025 36540 [../../allConstraintsAnalysis/codependencyCons... \n", "P2244 4027 36527 [../../allConstraintsAnalysis/codependencyCons... \n", "P413 357793 33607 [../../allConstraintsAnalysis/codependencyCons... \n", "\n", " violation_ratio \n", "P2214 0.240196 \n", "P433 0.014035 \n", "P275 20.751973 \n", "P2860 0.000658 \n", "P1435 0.041966 \n", "P708 1.198992 \n", "P197 0.424623 \n", "P1598 1.256991 \n", "P1111 463.270000 \n", "P2248 10.339801 \n", "P2325 9.975682 \n", "P856 0.030684 \n", "P2243 9.078261 \n", "P2244 9.070524 \n", "P413 0.093929 " ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 89, "id": "wireless-passenger", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 418.000000\n", "mean 2.448558\n", "std 24.334208\n", "min 0.000000\n", "25% 0.001391\n", "50% 0.022781\n", "75% 0.220654\n", "max 463.270000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 90, "id": "civilian-arnold", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios')" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 91, "id": "threaded-cooler", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5')" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4[codepConstDF4['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 92, "id": "olympic-charlotte", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 25/418\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF4['violation_ratio'] >= 2.414703)}/{len(codepConstDF4)}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "needed-multimedia", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "published-affiliate", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "aggregate-conservative", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "timesVersion = {\"MSN\": [], \"MN\": [], \"M\": [], \"N\": [], \"S\": []}\n", "filePath = '/data/wd-correctness/propertiesSplit/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"timeLog_codepConst_\"):\n", " ver = filename.split('_')[2]\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes\n", " timesVersion[ver] += tempTimes\n", "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "hearing-treasury", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MSN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "animal-vocabulary", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "gentle-accessory", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['M']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "fresh-namibia", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['N']).describe())" ] }, { "cell_type": "markdown", "id": "industrial-parcel", "metadata": {}, "source": [ "## Symmetric Constraint (Q21510862)\n", "\n", "This constraint says, if node1 has a property with this constraint, then both `(node1)-[prop]->(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "silent-fundamentals", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 1, "id": "known-wednesday", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-01 11:07:06 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510862']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510862)\" \\\n", " -o ../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 2, "id": "legal-diamond", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 3, "id": "exceptional-morris", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "burning-involvement", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 5, "id": "naval-identification", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 6, "id": "considered-madison", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 7, "id": "alone-cattle", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2316', 'P2303'], dtype=object)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 8, "id": "mighty-ordinary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2316 42\n", "P2303 3\n", "Name: label, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 9, "id": "sensitive-alliance", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 10, "id": "tender-valley", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1id
P1322P1322-P2302-Q21510862-85dea891-0NaN[Normal]
P1327P1327-P2302-Q21510862-a3c3a094-0NaN[Normal]
P1382P1382-P2302-Q21510862-f6bcfecf-0NaN[Normal]
P1560P1560-P2302-Q21510862-fabecaeb-0NaN[Q21502408]
P1639P1639-P2302-Q21510862-384edcd4-0NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 id \n", "P1322 P1322-P2302-Q21510862-85dea891-0 NaN [Normal]\n", "P1327 P1327-P2302-Q21510862-a3c3a094-0 NaN [Normal]\n", "P1382 P1382-P2302-Q21510862-f6bcfecf-0 NaN [Normal]\n", "P1560 P1560-P2302-Q21510862-fabecaeb-0 NaN [Q21502408]\n", "P1639 P1639-P2302-Q21510862-384edcd4-0 NaN [Q21502408]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "cellular-canal", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 12, "id": "desperate-poster", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1
P1322NaN[Normal]
P1327NaN[Normal]
P1382NaN[Normal]
P1560NaN[Q21502408]
P1639NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 \n", "P1322 NaN [Normal]\n", "P1327 NaN [Normal]\n", "P1382 NaN [Normal]\n", "P1560 NaN [Q21502408]\n", "P1639 NaN [Q21502408]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "primary-netherlands", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 14, "id": "pointed-haven", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "354610f2497449c79eb9bec3e2c76294", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "\n", "folderName = 'symmetricConstraint'\n", "shellFileSuffix = 'symmConst_Validator_'\n", "graph_cache_prefix = 'symm_03'\n", "\n", "for row in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " prop = row[0]\n", " constraint = row[1]\n", " mandatory = []\n", " suggestion = []\n", " normal = []\n", " prop = str(prop)\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " sfname = 'mandatory'\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " sfname = 'suggestion'\n", " elif constraint['P2316'][0] == 'Normal':\n", " sfname = 'normal'\n", " else:\n", " sfname = 'normal'\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplit/claims.\"+ prop +\".copy2.tsv \\\n", " --match 'tsv: (node1)-[nodeProp]->(node2), copy2: (node2)-[]->(node1)' \"\n", " \n", " os.system(\"cp ../../propertiesSplit/claims.\"+ prop +\".tsv ../../propertiesSplit/claims.\"+ prop +\".copy2.tsv\")\n", " \n", " if cnt % 20 == 0:\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " command\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = constraint['P2303']\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)" ] }, { "cell_type": "code", "execution_count": 15, "id": "polar-canada", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "38" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 16, "id": "virtual-disney", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,3):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/symmConst_Validator_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "coral-cheese", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 2, "id": "governmental-backup", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d19bfcb280e649a996395694bd18bb6c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7f12ce1ceba5485d96a83c3fe719d485", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/13 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2152751[../../allConstraintsAnalysis/symmetricConstra...0.013158
P1639205920[../../allConstraintsAnalysis/symmetricConstra...0.009620
P1560323913[../../allConstraintsAnalysis/symmetricConstra...0.003998
P61852800[../../allConstraintsAnalysis/symmetricConstra...0.000000
P336417860[../../allConstraintsAnalysis/symmetricConstra...0.000000
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2152 75 1 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1639 2059 20 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1560 3239 13 [../../allConstraintsAnalysis/symmetricConstra... \n", "P6185 280 0 [../../allConstraintsAnalysis/symmetricConstra... \n", "P3364 1786 0 [../../allConstraintsAnalysis/symmetricConstra... \n", "\n", " violation_ratio \n", "P2152 0.013158 \n", "P1639 0.009620 \n", "P1560 0.003998 \n", "P6185 0.000000 \n", "P3364 0.000000 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2152751[../../allConstraintsAnalysis/symmetricConstra...0.013158
P1639205920[../../allConstraintsAnalysis/symmetricConstra...0.009620
P1560323913[../../allConstraintsAnalysis/symmetricConstra...0.003998
P61852800[../../allConstraintsAnalysis/symmetricConstra...0.000000
P336417860[../../allConstraintsAnalysis/symmetricConstra...0.000000
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2152 75 1 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1639 2059 20 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1560 3239 13 [../../allConstraintsAnalysis/symmetricConstra... \n", "P6185 280 0 [../../allConstraintsAnalysis/symmetricConstra... \n", "P3364 1786 0 [../../allConstraintsAnalysis/symmetricConstra... \n", "\n", " violation_ratio \n", "P2152 0.013158 \n", "P1639 0.009620 \n", "P1560 0.003998 \n", "P6185 0.000000 \n", "P3364 0.000000 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF1 = pd.DataFrame(symmConstViolations['mandatory']).T\n", "symmConstDF1['violation_ratio'] = symmConstDF1.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 8, "id": "gross-extraction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P27891038926058[../../allConstraintsAnalysis/symmetricConstra...0.055098
P188950595423764[../../allConstraintsAnalysis/symmetricConstra...0.044862
P1971730231773[../../allConstraintsAnalysis/symmetricConstra...0.010143
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2789 103892 6058 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1889 505954 23764 [../../allConstraintsAnalysis/symmetricConstra... \n", "P197 173023 1773 [../../allConstraintsAnalysis/symmetricConstra... \n", "\n", " violation_ratio \n", "P2789 0.055098 \n", "P1889 0.044862 \n", "P197 0.010143 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P27891038926058[../../allConstraintsAnalysis/symmetricConstra...0.055098
P188950595423764[../../allConstraintsAnalysis/symmetricConstra...0.044862
P1971730231773[../../allConstraintsAnalysis/symmetricConstra...0.010143
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2789 103892 6058 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1889 505954 23764 [../../allConstraintsAnalysis/symmetricConstra... \n", "P197 173023 1773 [../../allConstraintsAnalysis/symmetricConstra... \n", "\n", " violation_ratio \n", "P2789 0.055098 \n", "P1889 0.044862 \n", "P197 0.010143 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF2 = pd.DataFrame(symmConstViolations['suggestion']).T\n", "symmConstDF2['violation_ratio'] = symmConstDF2.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 9, "id": "heavy-scout", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P518802[../../allConstraintsAnalysis/symmetricConstra...1.000000
P1706449[../../allConstraintsAnalysis/symmetricConstra...0.924528
P2652460777[../../allConstraintsAnalysis/symmetricConstra...0.628133
P521418144[../../allConstraintsAnalysis/symmetricConstra...0.256228
P229399841919[../../allConstraintsAnalysis/symmetricConstra...0.161220
P30321674320[../../allConstraintsAnalysis/symmetricConstra...0.160481
P1382108071587[../../allConstraintsAnalysis/symmetricConstra...0.128046
P13277754638[../../allConstraintsAnalysis/symmetricConstra...0.076025
P4519549749[../../allConstraintsAnalysis/symmetricConstra...0.072733
P34032130110[../../allConstraintsAnalysis/symmetricConstra...0.049107
P4602389808022[../../allConstraintsAnalysis/symmetricConstra...0.032477
P514301[../../allConstraintsAnalysis/symmetricConstra...0.032258
P5306595213[../../allConstraintsAnalysis/symmetricConstra...0.031287
P46115535474[../../allConstraintsAnalysis/symmetricConstra...0.029608
P4754825113705[../../allConstraintsAnalysis/symmetricConstra...0.024388
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5188 0 2 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1706 4 49 [../../allConstraintsAnalysis/symmetricConstra... \n", "P2652 460 777 [../../allConstraintsAnalysis/symmetricConstra... \n", "P521 418 144 [../../allConstraintsAnalysis/symmetricConstra... \n", "P2293 9984 1919 [../../allConstraintsAnalysis/symmetricConstra... \n", "P3032 1674 320 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1382 10807 1587 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1327 7754 638 [../../allConstraintsAnalysis/symmetricConstra... \n", "P451 9549 749 [../../allConstraintsAnalysis/symmetricConstra... \n", "P3403 2130 110 [../../allConstraintsAnalysis/symmetricConstra... \n", "P460 238980 8022 [../../allConstraintsAnalysis/symmetricConstra... \n", "P514 30 1 [../../allConstraintsAnalysis/symmetricConstra... \n", "P530 6595 213 [../../allConstraintsAnalysis/symmetricConstra... \n", "P461 15535 474 [../../allConstraintsAnalysis/symmetricConstra... \n", "P47 548251 13705 [../../allConstraintsAnalysis/symmetricConstra... \n", "\n", " violation_ratio \n", "P5188 1.000000 \n", "P1706 0.924528 \n", "P2652 0.628133 \n", "P521 0.256228 \n", "P2293 0.161220 \n", "P3032 0.160481 \n", "P1382 0.128046 \n", "P1327 0.076025 \n", "P451 0.072733 \n", "P3403 0.049107 \n", "P460 0.032477 \n", "P514 0.032258 \n", "P530 0.031287 \n", "P461 0.029608 \n", "P47 0.024388 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P518802[../../allConstraintsAnalysis/symmetricConstra...1.000000
P1706449[../../allConstraintsAnalysis/symmetricConstra...0.924528
P2652460777[../../allConstraintsAnalysis/symmetricConstra...0.628133
P521418144[../../allConstraintsAnalysis/symmetricConstra...0.256228
P229399841919[../../allConstraintsAnalysis/symmetricConstra...0.161220
P30321674320[../../allConstraintsAnalysis/symmetricConstra...0.160481
P1382108071587[../../allConstraintsAnalysis/symmetricConstra...0.128046
P13277754638[../../allConstraintsAnalysis/symmetricConstra...0.076025
P4519549749[../../allConstraintsAnalysis/symmetricConstra...0.072733
P34032130110[../../allConstraintsAnalysis/symmetricConstra...0.049107
P4602389808022[../../allConstraintsAnalysis/symmetricConstra...0.032477
P514301[../../allConstraintsAnalysis/symmetricConstra...0.032258
P5306595213[../../allConstraintsAnalysis/symmetricConstra...0.031287
P46115535474[../../allConstraintsAnalysis/symmetricConstra...0.029608
P4754825113705[../../allConstraintsAnalysis/symmetricConstra...0.024388
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5188 0 2 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1706 4 49 [../../allConstraintsAnalysis/symmetricConstra... \n", "P2652 460 777 [../../allConstraintsAnalysis/symmetricConstra... \n", "P521 418 144 [../../allConstraintsAnalysis/symmetricConstra... \n", "P2293 9984 1919 [../../allConstraintsAnalysis/symmetricConstra... \n", "P3032 1674 320 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1382 10807 1587 [../../allConstraintsAnalysis/symmetricConstra... \n", "P1327 7754 638 [../../allConstraintsAnalysis/symmetricConstra... \n", "P451 9549 749 [../../allConstraintsAnalysis/symmetricConstra... \n", "P3403 2130 110 [../../allConstraintsAnalysis/symmetricConstra... \n", "P460 238980 8022 [../../allConstraintsAnalysis/symmetricConstra... \n", "P514 30 1 [../../allConstraintsAnalysis/symmetricConstra... \n", "P530 6595 213 [../../allConstraintsAnalysis/symmetricConstra... \n", "P461 15535 474 [../../allConstraintsAnalysis/symmetricConstra... \n", "P47 548251 13705 [../../allConstraintsAnalysis/symmetricConstra... \n", "\n", " violation_ratio \n", "P5188 1.000000 \n", "P1706 0.924528 \n", "P2652 0.628133 \n", "P521 0.256228 \n", "P2293 0.161220 \n", "P3032 0.160481 \n", "P1382 0.128046 \n", "P1327 0.076025 \n", "P451 0.072733 \n", "P3403 0.049107 \n", "P460 0.032477 \n", "P514 0.032258 \n", "P530 0.031287 \n", "P461 0.029608 \n", "P47 0.024388 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF3 = pd.DataFrame(symmConstViolations['normal']).T\n", "symmConstDF3['violation_ratio'] = symmConstDF3.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 10, "id": "sexual-blowing", "metadata": {}, "outputs": [], "source": [ "# !head ../../allConstraintsAnalysis/symmetricConstraint/normal/claims.P3032.incorrect.tsv\n", "\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "legitimate-aspect", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "premium-yahoo", "metadata": {}, "outputs": [], "source": [ "pd.concat([symmConstDF1, symmConstDF2, symmConstDF3]).to_csv('../../allConstraintsAnalysis/symmConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "unlikely-sewing", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 11, "id": "southern-reasoning", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "01675fcd83284c8ab2aa683f43fef458", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/108 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "informed-animal", "metadata": {}, "source": [ "## Inverse Constraint (Q21510855)\n", "\n", "This constraint says, if node1 has a property with this constraint, then both `(node1)-[prop]->(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "dramatic-manchester", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 1, "id": "leading-server", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-11 11:02:04 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510855']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510855)\" \\\n", " -o ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 2, "id": "offshore-sudan", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "P1026-P2302-Q21510855-adc83b86-0\tP1026\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1029-P2302-Q21510855-6b55e057-0\tP1029\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P115-P2302-Q21510855-f7aa0b78-0\tP115\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1151-P2302-Q21510855-0d9aa9c6-0\tP1151\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1204-P2302-Q21510855-e3d53bb6-0\tP1204\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1283-P2302-Q21510855-0e7699bb-0\tP1283\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1308-P2302-Q21510855-2aba96b7-0\tP1308\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1365-P2302-Q21510855-c809b758-0\tP1365\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1366-P2302-Q21510855-eee12ef8-0\tP1366\tP2302\tQ21510855\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv" ] }, { "cell_type": "code", "execution_count": 3, "id": "received-colonial", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 4, "id": "overall-expense", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "valid-throat", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 6, "id": "focused-pennsylvania", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/inverseConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 7, "id": "moved-rental", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 8, "id": "attached-rings", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2316', 'P4155', 'P2303'], dtype=object)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 9, "id": "loving-mileage", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 110\n", "P2316 10\n", "P2303 2\n", "P4155 1\n", "Name: label, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 10, "id": "local-forty", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 11, "id": "pressed-upset", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1id
P1026P1026-P2302-Q21510855-adc83b86-0NaN[P50]NaNNaN
P1029P1029-P2302-Q21510855-6b55e057-0NaN[P5096]NaNNaN
P115P115-P2302-Q21510855-f7aa0b78-0NaN[P466]NaNNaN
P1151P1151-P2302-Q21510855-0d9aa9c6-0NaN[P1204][Q21502408]NaN
P1204P1204-P2302-Q21510855-e3d53bb6-0NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 id \n", "P1026 P1026-P2302-Q21510855-adc83b86-0 NaN [P50] NaN NaN\n", "P1029 P1029-P2302-Q21510855-6b55e057-0 NaN [P5096] NaN NaN\n", "P115 P115-P2302-Q21510855-f7aa0b78-0 NaN [P466] NaN NaN\n", "P1151 P1151-P2302-Q21510855-0d9aa9c6-0 NaN [P1204] [Q21502408] NaN\n", "P1204 P1204-P2302-Q21510855-e3d53bb6-0 NaN [P1151] NaN NaN" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 12, "id": "extra-stomach", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 13, "id": "seeing-marine", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1
P1026NaN[P50]NaNNaN
P1029NaN[P5096]NaNNaN
P115NaN[P466]NaNNaN
P1151NaN[P1204][Q21502408]NaN
P1204NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 \n", "P1026 NaN [P50] NaN NaN\n", "P1029 NaN [P5096] NaN NaN\n", "P115 NaN [P466] NaN NaN\n", "P1151 NaN [P1204] [Q21502408] NaN\n", "P1204 NaN [P1151] NaN NaN" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "composite-cutting", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 48, "id": "acoustic-belarus", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0d0055aba376447a853b9ca80241247a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "fOP = None\n", "\n", "folderName = 'inverseConstraint_Final'\n", "shellFileSuffix = 'invConst_Validator_new3_'\n", "graph_cache_file_prefix = \"inv_2_\"\n", "\n", "for prop, constraint in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " subFolderName = \"mandatory\"\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " subFolderName = \"suggestion\"\n", " else:\n", " subFolderName = \"normal\"\n", " \n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " prop2 = constraint['P2306']\n", "\n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", "\n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplit/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " if cnt % 20 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplit/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplit/claims.\"+ prop2 +\".tsv \\\n", " --match '\"+ \\\n", " f\"{prop}: (node1)-[nodeProp]->(node2), {prop2}: (node2)-[]->(node1)' \"\n", "\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = set(constraint['P2303'])\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplit/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", "# print(command) \n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysis/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplit/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 49, "id": "large-climb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "110" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "involved-vietnamese", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,7):\n", "# os.system(\"screen -dm sh ../../propertiesSplit/checkViolations/invConst_Validator_new3_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "retired-audio", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 1, "id": "specified-evanescence", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a91df974ff5549c186e5dbb8175b77f0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5ad67f2455284261bcb87f9ffdc2914c", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/12 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P267381968[../../allConstraintsAnalysis/inverseConstrain...0.076663
P41472719[../../allConstraintsAnalysis/inverseConstrain...0.032143
P41492724[../../allConstraintsAnalysis/inverseConstrain...0.014493
P2033183626[../../allConstraintsAnalysis/inverseConstrain...0.013963
P450175416[../../allConstraintsAnalysis/inverseConstrain...0.009040
P115116124[../../allConstraintsAnalysis/inverseConstrain...0.002475
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2673 819 68 [../../allConstraintsAnalysis/inverseConstrain... \n", "P4147 271 9 [../../allConstraintsAnalysis/inverseConstrain... \n", "P4149 272 4 [../../allConstraintsAnalysis/inverseConstrain... \n", "P2033 1836 26 [../../allConstraintsAnalysis/inverseConstrain... \n", "P450 1754 16 [../../allConstraintsAnalysis/inverseConstrain... \n", "P1151 1612 4 [../../allConstraintsAnalysis/inverseConstrain... \n", "\n", " violation_ratio \n", "P2673 0.076663 \n", "P4147 0.032143 \n", "P4149 0.014493 \n", "P2033 0.013963 \n", "P450 0.009040 \n", "P1151 0.002475 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1 = pd.DataFrame(invConstViolations['mandatory']).T\n", "invConstDF1['violation_ratio'] = invConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 7, "id": "valid-symposium", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P143435124723[../../allConstraintsAnalysis/inverseConstrain...0.573528
P15596320548956[../../allConstraintsAnalysis/inverseConstrain...0.048368
P15696318339925[../../allConstraintsAnalysis/inverseConstrain...0.039801
P62972202131[../../allConstraintsAnalysis/inverseConstrain...0.001811
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1434 3512 4723 [../../allConstraintsAnalysis/inverseConstrain... \n", "P155 963205 48956 [../../allConstraintsAnalysis/inverseConstrain... \n", "P156 963183 39925 [../../allConstraintsAnalysis/inverseConstrain... \n", "P629 72202 131 [../../allConstraintsAnalysis/inverseConstrain... \n", "\n", " violation_ratio \n", "P1434 0.573528 \n", "P155 0.048368 \n", "P156 0.039801 \n", "P629 0.001811 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF2 = pd.DataFrame(invConstViolations['suggestion']).T\n", "invConstDF2['violation_ratio'] = invConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 8, "id": "resident-mustang", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P160512188[../../allConstraintsAnalysis/inverseConstrain...0.940000
P34485834249[../../allConstraintsAnalysis/inverseConstrain...0.879346
P92615[../../allConstraintsAnalysis/inverseConstrain...0.833333
P92515[../../allConstraintsAnalysis/inverseConstrain...0.833333
P10294752037[../../allConstraintsAnalysis/inverseConstrain...0.810908
P115671224290[../../allConstraintsAnalysis/inverseConstrain...0.783498
P8625717[../../allConstraintsAnalysis/inverseConstrain...0.708333
P51328189[../../allConstraintsAnalysis/inverseConstrain...0.523529
P42525121956[../../allConstraintsAnalysis/inverseConstrain...0.437780
P38161410[../../allConstraintsAnalysis/inverseConstrain...0.416667
P167764[../../allConstraintsAnalysis/inverseConstrain...0.400000
P2512210140[../../allConstraintsAnalysis/inverseConstrain...0.400000
P2578989527[../../allConstraintsAnalysis/inverseConstrain...0.347625
P3261232122[../../allConstraintsAnalysis/inverseConstrain...0.344633
P5681049445[../../allConstraintsAnalysis/inverseConstrain...0.297858
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1605 12 188 [../../allConstraintsAnalysis/inverseConstrain... \n", "P3448 583 4249 [../../allConstraintsAnalysis/inverseConstrain... \n", "P926 1 5 [../../allConstraintsAnalysis/inverseConstrain... \n", "P925 1 5 [../../allConstraintsAnalysis/inverseConstrain... \n", "P1029 475 2037 [../../allConstraintsAnalysis/inverseConstrain... \n", "P115 6712 24290 [../../allConstraintsAnalysis/inverseConstrain... \n", "P8625 7 17 [../../allConstraintsAnalysis/inverseConstrain... \n", "P5132 81 89 [../../allConstraintsAnalysis/inverseConstrain... \n", "P425 2512 1956 [../../allConstraintsAnalysis/inverseConstrain... \n", "P3816 14 10 [../../allConstraintsAnalysis/inverseConstrain... \n", "P1677 6 4 [../../allConstraintsAnalysis/inverseConstrain... \n", "P2512 210 140 [../../allConstraintsAnalysis/inverseConstrain... \n", "P2578 989 527 [../../allConstraintsAnalysis/inverseConstrain... \n", "P3261 232 122 [../../allConstraintsAnalysis/inverseConstrain... \n", "P568 1049 445 [../../allConstraintsAnalysis/inverseConstrain... \n", "\n", " violation_ratio \n", "P1605 0.940000 \n", "P3448 0.879346 \n", "P926 0.833333 \n", "P925 0.833333 \n", "P1029 0.810908 \n", "P115 0.783498 \n", "P8625 0.708333 \n", "P5132 0.523529 \n", "P425 0.437780 \n", "P3816 0.416667 \n", "P1677 0.400000 \n", "P2512 0.400000 \n", "P2578 0.347625 \n", "P3261 0.344633 \n", "P568 0.297858 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF3 = pd.DataFrame(invConstViolations['normal']).T\n", "invConstDF3['violation_ratio'] = invConstDF3.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 9, "id": "dietary-venue", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "Q1133903-P925-Q18707-eae3a579-0\tQ1133903\tP925\tQ18707\tnormal\twikibase-item\r\n", "Q1570272-P925-Q7135001-5dbdce8c-0\tQ1570272\tP925\tQ7135001\tnormal\twikibase-item\r\n", "Q301613-P925-Q7135001-528c29b2-0\tQ301613\tP925\tQ7135001\tnormal\twikibase-item\r\n", "Q452595-P925-Q864951-c5b34e11-0\tQ452595\tP925\tQ864951\tnormal\twikibase-item\r\n", "Q5064084-P925-Q4117017-76545a06-0\tQ5064084\tP925\tQ4117017\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysis/inverseConstraint/normal/claims.P925.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 10, "id": "entire-gauge", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "invConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "located-water", "metadata": {}, "outputs": [], "source": [ "pd.concat([invConstDF1, invConstDF2, invConstDF3]).to_csv('../../allConstraintsAnalysis/invConstDFAnalysis.csv')" ] }, { "cell_type": "markdown", "id": "working-stable", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 55, "id": "saved-twelve", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2e8a241c831b4968ae22d06c22c6e85e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/122 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "ongoing-merit", "metadata": {}, "source": [ "# Combine Plots for constraints" ] }, { "cell_type": "code", "execution_count": 1, "id": "fundamental-contrary", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "typeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/typeConstDFAnalysis.csv\")\n", "typeConstDF = typeConstDF.set_index(typeConstDF.iloc[:, 0])\n", "\n", "valTypeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/valueTypeConstDFAnalysis.csv\")\n", "valTypeConstDF = valTypeConstDF.set_index(valTypeConstDF.iloc[:, 0])\n", "\n", "codepConstDF1 = pd.read_csv(\"../../allConstraintsAnalysis/codepConstDFAnalysis.csv\")\n", "codepConstDF1 = codepConstDF1.set_index(codepConstDF1.iloc[:, 0])\n", "\n", "symmConstDF = pd.read_csv(\"../../allConstraintsAnalysis/symmConstDFAnalysis.csv\")\n", "symmConstDF = symmConstDF.set_index(symmConstDF.iloc[:, 0])\n", "\n", "invConstDF = pd.read_csv(\"../../allConstraintsAnalysis/invConstDFAnalysis.csv\")\n", "invConstDF = invConstDF.set_index(invConstDF.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": 14, "id": "bigger-standing", "metadata": {}, "outputs": [], "source": [ "typeConstDF1 = typeConstDF.add_suffix(\"_type_const\")['violation_ratio_type_const'].rename().sort_values()" ] }, { "cell_type": "code", "execution_count": 15, "id": "human-artist", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1 = valTypeConstDF.add_suffix(\"_valuetype_const\")['violation_ratio_valuetype_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 16, "id": "gothic-decision", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1 = codepConstDF1.add_suffix(\"_codep_const\")['violation_ratio_codep_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 17, "id": "alleged-immunology", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1 = symmConstDF.add_suffix(\"_symm_const\")['violation_ratio_symm_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 18, "id": "vulnerable-estonia", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1 = invConstDF.add_suffix(\"_inv_const\")['violation_ratio_inv_const'].sort_values()" ] }, { "cell_type": "code", "execution_count": 7, "id": "individual-pocket", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Unnamed: 0\n", "P1605 0.940000\n", "P3448 0.879346\n", "P925 0.833333\n", "P926 0.833333\n", "P1029 0.810908\n", "Name: violation_ratio_inv_const, dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1_1.head()" ] }, { "cell_type": "code", "execution_count": 19, "id": "facial-announcement", "metadata": {}, "outputs": [], "source": [ "typeConstDF1.index.names = ['property']\n", "typeConstDF1 = typeConstDF1.reset_index().reset_index()\n", "typeConstDF1['index'] = typeConstDF1['index'].apply(lambda p: (p+1) * 100/len(typeConstDF1))\n", "typeConstDF1 = typeConstDF1.set_index('index')[0]" ] }, { "cell_type": "code", "execution_count": 21, "id": "impressive-fantasy", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1.index.names = ['property']\n", "valTypeConstDF1 = valTypeConstDF1.reset_index().reset_index()\n", "valTypeConstDF1['index'] = valTypeConstDF1['index'].apply(lambda p: (p+1) * 100/len(valTypeConstDF1))\n", "valTypeConstDF1 = valTypeConstDF1.set_index('index')['violation_ratio_valuetype_const']" ] }, { "cell_type": "code", "execution_count": 22, "id": "preceding-yahoo", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1.index.names = ['property']\n", "codepConstDF1_1 = codepConstDF1_1.reset_index().reset_index()\n", "codepConstDF1_1['index'] = codepConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(codepConstDF1_1))\n", "codepConstDF1_1 = codepConstDF1_1.set_index('index')['violation_ratio_codep_const']" ] }, { "cell_type": "code", "execution_count": 23, "id": "attached-civilization", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1.index.names = ['property']\n", "symmConstDF1_1 = symmConstDF1_1.reset_index().reset_index()\n", "symmConstDF1_1['index'] = symmConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(symmConstDF1_1))\n", "symmConstDF1_1 = symmConstDF1_1.set_index('index')['violation_ratio_symm_const']" ] }, { "cell_type": "code", "execution_count": 24, "id": "supposed-fitting", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1.index.names = ['property']\n", "invConstDF1_1 = invConstDF1_1.reset_index().reset_index()\n", "invConstDF1_1['index'] = invConstDF1_1['index'].apply(lambda p: (p+1) * 100/len(invConstDF1_1))\n", "invConstDF1_1 = invConstDF1_1.set_index('index')['violation_ratio_inv_const']" ] }, { "cell_type": "code", "execution_count": 29, "id": "lonely-emphasis", "metadata": {}, "outputs": [], "source": [ "typeConstDF2 = [np.percentile(typeConstDF1,i)*100 for i in range(1, 101)]\n", "valTypeConstDF2 = [np.percentile(valTypeConstDF1,i)*100 for i in range(1, 101)]\n", "codepConstDF1_2 = [np.percentile(codepConstDF1_1,i)*100 for i in range(1, 101)]\n", "symmConstDF1_2 = [np.percentile(symmConstDF1_1,i)*100 for i in range(1, 101)]\n", "invConstDF1_2 = [np.percentile(invConstDF1_1,i)*100 for i in range(1, 101)]" ] }, { "cell_type": "code", "execution_count": 30, "id": "stopped-guarantee", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = pd.DataFrame({'index':list(range(1, 101)), 'type': typeConstDF2, 'value type': valTypeConstDF2, 'irs': codepConstDF1_2, 'symmetric': symmConstDF1_2, 'inverse': invConstDF1_2})" ] }, { "cell_type": "code", "execution_count": 31, "id": "synthetic-modification", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = constAnalysisDF.melt('index', var_name='constraint', value_name='VR')" ] }, { "cell_type": "code", "execution_count": 32, "id": "collective-adolescent", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexconstraintVR
01type0.0
12type0.0
23type0.0
34type0.0
45type0.0
\n", "
" ], "text/plain": [ " index constraint VR\n", "0 1 type 0.0\n", "1 2 type 0.0\n", "2 3 type 0.0\n", "3 4 type 0.0\n", "4 5 type 0.0" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF.head()" ] }, { "cell_type": "code", "execution_count": 16, "id": "angry-saying", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Proportion of properties (in %)'),\n", " Text(0, 0.5, 'Violation Ratio (in %)')]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10, 6))\n", "ax = sns.lineplot(x='index', y='VR', hue='constraint', data=constAnalysisDF)\n", "ax.set(xlabel=\"Proportion of properties (in %)\", ylabel = \"Violation Ratio (in %)\")" ] }, { "cell_type": "markdown", "id": "controversial-invitation", "metadata": {}, "source": [ "## Option 2" ] }, { "cell_type": "code", "execution_count": 113, "id": "alternate-ceremony", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "typeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/typeConstDFAnalysis.csv\")\n", "typeConstDF = typeConstDF.set_index(typeConstDF.iloc[:, 0])\n", "\n", "valTypeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/valueTypeConstDFAnalysis.csv\")\n", "valTypeConstDF = valTypeConstDF.set_index(valTypeConstDF.iloc[:, 0])\n", "\n", "codepConstDF1 = pd.read_csv(\"../../allConstraintsAnalysis/codepConstDFAnalysis.csv\")\n", "codepConstDF1 = codepConstDF1.set_index(codepConstDF1.iloc[:, 0])\n", "\n", "symmConstDF = pd.read_csv(\"../../allConstraintsAnalysis/symmConstDFAnalysis.csv\")\n", "symmConstDF = symmConstDF.set_index(symmConstDF.iloc[:, 0])\n", "\n", "invConstDF = pd.read_csv(\"../../allConstraintsAnalysis/invConstDFAnalysis.csv\")\n", "invConstDF = invConstDF.set_index(invConstDF.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": 114, "id": "complimentary-prague", "metadata": {}, "outputs": [], "source": [ "typeConstDF1 = typeConstDF.add_suffix(\"_type_const\")[['violation_ratio_type_const']].sort_values(by=['violation_ratio_type_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 115, "id": "executed-suffering", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1 = valTypeConstDF.add_suffix(\"_valuetype_const\")[['violation_ratio_valuetype_const']].sort_values(by=['violation_ratio_valuetype_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 116, "id": "green-still", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1 = codepConstDF1.add_suffix(\"_codep_const\")[['violation_ratio_codep_const']].sort_values(by=['violation_ratio_codep_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 117, "id": "adopted-andrew", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1 = symmConstDF.add_suffix(\"_symm_const\")[['violation_ratio_symm_const']].sort_values(by=['violation_ratio_symm_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 118, "id": "attended-unknown", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1 = invConstDF.add_suffix(\"_inv_const\")[['violation_ratio_inv_const']].sort_values(by=['violation_ratio_inv_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 119, "id": "conventional-blues", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
violation_ratio_inv_const
Unnamed: 0
P16050.940000
P34480.879346
P9250.833333
P9260.833333
P10290.810908
\n", "
" ], "text/plain": [ " violation_ratio_inv_const\n", "Unnamed: 0 \n", "P1605 0.940000\n", "P3448 0.879346\n", "P925 0.833333\n", "P926 0.833333\n", "P1029 0.810908" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1_1.head()" ] }, { "cell_type": "code", "execution_count": 147, "id": "viral-restoration", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1456, 897, 527, 38, 110)" ] }, "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(typeConstDF1), len(valTypeConstDF1), len(codepConstDF1_1), len(symmConstDF), len(invConstDF1_1)" ] }, { "cell_type": "code", "execution_count": 120, "id": "gross-measurement", "metadata": {}, "outputs": [], "source": [ "typeConstDF1.index.names = ['property']\n", "typeConstDF1 = typeConstDF1.reset_index()[['violation_ratio_type_const']]" ] }, { "cell_type": "code", "execution_count": 121, "id": "fatty-revolution", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1.index.names = ['property']\n", "valTypeConstDF1 = valTypeConstDF1.reset_index()[['violation_ratio_valuetype_const']]" ] }, { "cell_type": "code", "execution_count": 122, "id": "short-dallas", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1.index.names = ['property']\n", "codepConstDF1_1 = codepConstDF1_1.reset_index()[['violation_ratio_codep_const']]" ] }, { "cell_type": "code", "execution_count": 123, "id": "sublime-helen", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1.index.names = ['property']\n", "symmConstDF1_1 = symmConstDF1_1.reset_index()[['violation_ratio_symm_const']]" ] }, { "cell_type": "code", "execution_count": 124, "id": "dying-interview", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1.index.names = ['property']\n", "invConstDF1_1 = invConstDF1_1.reset_index()[['violation_ratio_inv_const']]" ] }, { "cell_type": "code", "execution_count": 125, "id": "warming-glucose", "metadata": {}, "outputs": [], "source": [ "typeConstDF2 = [np.percentile(typeConstDF1,i)*100 for i in range(1, 101)]\n", "valTypeConstDF2 = [np.percentile(valTypeConstDF1,i)*100 for i in range(1, 101)]\n", "codepConstDF1_2 = [np.percentile(codepConstDF1_1,i)*100 for i in range(1, 101)]\n", "symmConstDF1_2 = [np.percentile(symmConstDF1_1,i)*100 for i in range(1, 101)]\n", "invConstDF1_2 = [np.percentile(invConstDF1_1,i)*100 for i in range(1, 101)]" ] }, { "cell_type": "code", "execution_count": 149, "id": "intended-bulgarian", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = pd.DataFrame({'index':list(range(100, 0, -1)), 'type': typeConstDF2, 'value type': valTypeConstDF2, 'irs': codepConstDF1_2, 'symmetric': symmConstDF1_2, 'inverse': invConstDF1_2})" ] }, { "cell_type": "code", "execution_count": 150, "id": "alive-guitar", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indextypevalue typeirssymmetricinverse
01000.00.00.00.00.000000
1990.00.00.00.00.000000
2980.00.00.00.00.000000
3970.00.00.00.00.001924
4960.00.00.00.00.010648
\n", "
" ], "text/plain": [ " index type value type irs symmetric inverse\n", "0 100 0.0 0.0 0.0 0.0 0.000000\n", "1 99 0.0 0.0 0.0 0.0 0.000000\n", "2 98 0.0 0.0 0.0 0.0 0.000000\n", "3 97 0.0 0.0 0.0 0.0 0.001924\n", "4 96 0.0 0.0 0.0 0.0 0.010648" ] }, "execution_count": 150, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF.head()" ] }, { "cell_type": "code", "execution_count": 151, "id": "tired-breed", "metadata": {}, "outputs": [], "source": [ "# constAnalysisDF = typeConstDF1.join(valTypeConstDF1).join(codepConstDF1_1).join(symmConstDF1_1).join(invConstDF1_1).rename(columns={'violation_ratio_type_const':'type', 'violation_ratio_valuetype_const': 'value type', 'violation_ratio_codep_const': 'irs', 'violation_ratio_symm_const': 'symmetric', 'violation_ratio_inv_const': 'inverse'}).reset_index()" ] }, { "cell_type": "code", "execution_count": 152, "id": "fuzzy-oakland", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = constAnalysisDF.melt('index', var_name='constraint', value_name='VR')" ] }, { "cell_type": "code", "execution_count": 153, "id": "common-april", "metadata": {}, "outputs": [], "source": [ "def getSizes(row):\n", " map1 = {'type': 1456, 'value type': 897, 'irs': 527, 'symmetric': 38, 'inverse':110}\n", " return map1[row['constraint']]\n", "constAnalysisDF['len'] = constAnalysisDF.apply(getSizes, axis=1)" ] }, { "cell_type": "code", "execution_count": 154, "id": "interstate-sunday", "metadata": {}, "outputs": [], "source": [ "# constAnalysisDF['VR'] *= 100" ] }, { "cell_type": "code", "execution_count": 155, "id": "continent-representation", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexconstraintVRlen
0100type0.01456
199type0.01456
298type0.01456
397type0.01456
496type0.01456
\n", "
" ], "text/plain": [ " index constraint VR len\n", "0 100 type 0.0 1456\n", "1 99 type 0.0 1456\n", "2 98 type 0.0 1456\n", "3 97 type 0.0 1456\n", "4 96 type 0.0 1456" ] }, "execution_count": 155, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF.head()" ] }, { "cell_type": "code", "execution_count": 162, "id": "economic-henry", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Properties'),\n", " Text(0, 0.5, 'Violation Ratio (in %)'),\n", " [Text(-20.0, 0, ''),\n", " Text(0.0, 0, ''),\n", " Text(20.0, 0, ''),\n", " Text(40.0, 0, ''),\n", " Text(60.0, 0, ''),\n", " Text(80.0, 0, ''),\n", " Text(100.0, 0, ''),\n", " Text(120.0, 0, '')]]" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10, 6))\n", "ax = sns.scatterplot(x='index',y='VR',hue='constraint',data=constAnalysisDF)\n", "ax.set(xlabel=\"Properties\", ylabel = \"Violation Ratio (in %)\",xticklabels=[])\n", "# h,l = ax.get_legend_handles_labels()\n", "# plt.legend(h[0:3],l[0:3],bbox_to_anchor=(1.05, 1), loc=0, borderaxespad=0., fontsize=13)\n", "# plt.show(ax)" ] }, { "cell_type": "markdown", "id": "compliant-anger", "metadata": {}, "source": [ "## Option 3" ] }, { "cell_type": "code", "execution_count": 3, "id": "sought-charger", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "typeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/typeConstDFAnalysis.csv\")\n", "typeConstDF = typeConstDF.set_index(typeConstDF.iloc[:, 0])\n", "\n", "valTypeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/valueTypeConstDFAnalysis.csv\")\n", "valTypeConstDF = valTypeConstDF.set_index(valTypeConstDF.iloc[:, 0])\n", "\n", "codepConstDF1 = pd.read_csv(\"../../allConstraintsAnalysis/codepConstDFAnalysis.csv\")\n", "codepConstDF1 = codepConstDF1.set_index(codepConstDF1.iloc[:, 0])\n", "\n", "symmConstDF = pd.read_csv(\"../../allConstraintsAnalysis/symmConstDFAnalysis.csv\")\n", "symmConstDF = symmConstDF.set_index(symmConstDF.iloc[:, 0])\n", "\n", "invConstDF = pd.read_csv(\"../../allConstraintsAnalysis/invConstDFAnalysis.csv\")\n", "invConstDF = invConstDF.set_index(invConstDF.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": 4, "id": "thick-inflation", "metadata": {}, "outputs": [], "source": [ "typeConstDF1 = typeConstDF.add_suffix(\"_type_const\")[['violation_ratio_type_const']].sort_values(by=['violation_ratio_type_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 5, "id": "smart-activation", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1 = valTypeConstDF.add_suffix(\"_valuetype_const\")[['violation_ratio_valuetype_const']].sort_values(by=['violation_ratio_valuetype_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 6, "id": "whole-missile", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1 = codepConstDF1.add_suffix(\"_codep_const\")[['violation_ratio_codep_const']].sort_values(by=['violation_ratio_codep_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 7, "id": "funky-calibration", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1 = symmConstDF.add_suffix(\"_symm_const\")[['violation_ratio_symm_const']].sort_values(by=['violation_ratio_symm_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 8, "id": "removable-transmission", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1 = invConstDF.add_suffix(\"_inv_const\")[['violation_ratio_inv_const']].sort_values(by=['violation_ratio_inv_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "european-sentence", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
violation_ratio_inv_const
Unnamed: 0
P16050.940000
P34480.879346
P9250.833333
P9260.833333
P10290.810908
\n", "
" ], "text/plain": [ " violation_ratio_inv_const\n", "Unnamed: 0 \n", "P1605 0.940000\n", "P3448 0.879346\n", "P925 0.833333\n", "P926 0.833333\n", "P1029 0.810908" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1_1.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "lined-discrimination", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1456, 897, 527, 38, 110)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(typeConstDF1), len(valTypeConstDF1), len(codepConstDF1_1), len(symmConstDF), len(invConstDF1_1)" ] }, { "cell_type": "code", "execution_count": 11, "id": "imported-cooling", "metadata": {}, "outputs": [], "source": [ "typeConstDF1.index.names = ['property']\n", "typeConstDF1 = typeConstDF1.reset_index()[['violation_ratio_type_const']]" ] }, { "cell_type": "code", "execution_count": 12, "id": "hydraulic-coating", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1.index.names = ['property']\n", "valTypeConstDF1 = valTypeConstDF1.reset_index()[['violation_ratio_valuetype_const']]" ] }, { "cell_type": "code", "execution_count": 13, "id": "brief-amsterdam", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1.index.names = ['property']\n", "codepConstDF1_1 = codepConstDF1_1.reset_index()[['violation_ratio_codep_const']]" ] }, { "cell_type": "code", "execution_count": 14, "id": "massive-championship", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1.index.names = ['property']\n", "symmConstDF1_1 = symmConstDF1_1.reset_index()[['violation_ratio_symm_const']]" ] }, { "cell_type": "code", "execution_count": 15, "id": "psychological-homeless", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1.index.names = ['property']\n", "invConstDF1_1 = invConstDF1_1.reset_index()[['violation_ratio_inv_const']]" ] }, { "cell_type": "code", "execution_count": 47, "id": "alpine-sampling", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "typeConstDF2 = pd.DataFrame({'index': list(np.arange(100, 1, -0.05)), 'type': [np.percentile(typeConstDF1,i)*100 for i in np.arange(1, 100,0.05)]}).set_index('index')\n", "valTypeConstDF2 = pd.DataFrame({'index': list(np.arange(100, 1, -0.1)), 'value type': [np.percentile(valTypeConstDF1,i)*100 for i in np.arange(1, 100, 0.1)]}).set_index('index')\n", "codepConstDF1_2 = pd.DataFrame({'index': list(np.arange(100, 1, -0.5)), 'irs': [np.percentile(codepConstDF1_1,i)*100 for i in np.arange(1, 100, 0.5)]}).set_index('index')\n", "symmConstDF1_2 = pd.DataFrame({'index': list(np.arange(100, 1, -5)), 'symmetric': [np.percentile(symmConstDF1_1,i)*100 for i in np.arange(1, 100,5)]}).set_index('index')\n", "invConstDF1_2 = pd.DataFrame({'index': list(np.arange(100, 1, -1)), 'inverse': [np.percentile(invConstDF1_1,i)*100 for i in np.arange(1, 100,1)]}).set_index('index')" ] }, { "cell_type": "code", "execution_count": 48, "id": "elementary-attack", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = typeConstDF2.join(valTypeConstDF2, how='outer').join(codepConstDF1_2, how='outer').join(symmConstDF1_2, how='outer').join(invConstDF1_2, how='outer')\n", "constAnalysisDF = constAnalysisDF.reset_index()" ] }, { "cell_type": "code", "execution_count": 49, "id": "focused-georgia", "metadata": {}, "outputs": [], "source": [ "# constAnalysisDF = typeConstDF1.join(valTypeConstDF1).join(codepConstDF1_1).join(symmConstDF1_1).join(invConstDF1_1).rename(columns={'violation_ratio_type_const':'type', 'violation_ratio_valuetype_const': 'value type', 'violation_ratio_codep_const': 'irs', 'violation_ratio_symm_const': 'symmetric', 'violation_ratio_inv_const': 'inverse'}).reset_index()" ] }, { "cell_type": "code", "execution_count": 50, "id": "military-democrat", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = constAnalysisDF.melt('index', var_name='constraint', value_name='VR')" ] }, { "cell_type": "code", "execution_count": 51, "id": "fuzzy-somalia", "metadata": {}, "outputs": [], "source": [ "# def getSizes(row):\n", "# map1 = {'type': 1456, 'value type': 897, 'irs': 527, 'symmetric': 38, 'inverse':110}\n", "# return map1[row['constraint']]\n", "# constAnalysisDF['len'] = constAnalysisDF.apply(getSizes, axis=1)" ] }, { "cell_type": "code", "execution_count": 52, "id": "alleged-portugal", "metadata": {}, "outputs": [], "source": [ "# constAnalysisDF['VR'] *= 100" ] }, { "cell_type": "code", "execution_count": 53, "id": "australian-offering", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexconstraintVR
01.05type100.0
11.10type100.0
21.15type100.0
31.20type100.0
41.25type100.0
\n", "
" ], "text/plain": [ " index constraint VR\n", "0 1.05 type 100.0\n", "1 1.10 type 100.0\n", "2 1.15 type 100.0\n", "3 1.20 type 100.0\n", "4 1.25 type 100.0" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF.head()" ] }, { "cell_type": "code", "execution_count": 54, "id": "placed-speech", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Properties'),\n", " Text(0, 0.5, 'Violation Ratio (in %)'),\n", " [Text(-20.0, 0, ''),\n", " Text(0.0, 0, ''),\n", " Text(20.0, 0, ''),\n", " Text(40.0, 0, ''),\n", " Text(60.0, 0, ''),\n", " Text(80.0, 0, ''),\n", " Text(100.0, 0, ''),\n", " Text(120.0, 0, '')]]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10, 6))\n", "ax = sns.scatterplot(x='index',y='VR',hue='constraint',data=constAnalysisDF)\n", "ax.set(xlabel=\"Properties\", ylabel = \"Violation Ratio (in %)\",xticklabels=[])\n", "# h,l = ax.get_legend_handles_labels()\n", "# plt.legend(h[0:3],l[0:3],bbox_to_anchor=(1.05, 1), loc=0, borderaxespad=0., fontsize=13)\n", "# plt.show(ax)" ] }, { "cell_type": "markdown", "id": "adjustable-geneva", "metadata": {}, "source": [ "## Option 4" ] }, { "cell_type": "code", "execution_count": 73, "id": "furnished-northwest", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "typeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/typeConstDFAnalysis.csv\")\n", "typeConstDF = typeConstDF.set_index(typeConstDF.iloc[:, 0])\n", "\n", "valTypeConstDF = pd.read_csv(\"../../allConstraintsAnalysis/valueTypeConstDFAnalysis.csv\")\n", "valTypeConstDF = valTypeConstDF.set_index(valTypeConstDF.iloc[:, 0])\n", "\n", "codepConstDF1 = pd.read_csv(\"../../allConstraintsAnalysis/codepConstDFAnalysis.csv\")\n", "codepConstDF1 = codepConstDF1.set_index(codepConstDF1.iloc[:, 0])\n", "\n", "symmConstDF = pd.read_csv(\"../../allConstraintsAnalysis/symmConstDFAnalysis.csv\")\n", "symmConstDF = symmConstDF.set_index(symmConstDF.iloc[:, 0])\n", "\n", "invConstDF = pd.read_csv(\"../../allConstraintsAnalysis/invConstDFAnalysis.csv\")\n", "invConstDF = invConstDF.set_index(invConstDF.iloc[:, 0])" ] }, { "cell_type": "code", "execution_count": 77, "id": "proof-screen", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0correctincorrectpathsviolation_ratiototal
Unnamed: 0
P742P7424890389['../../allConstraintsAnalysis/typeConstraint_...0.00181748992
P2663P26635547['../../allConstraintsAnalysis/typeConstraint_...0.012478561
P5105P5105191995['../../allConstraintsAnalysis/typeConstraint_...0.0471702014
P6938P693832['../../allConstraintsAnalysis/typeConstraint_...0.4000005
P3179P3179505213['../../allConstraintsAnalysis/typeConstraint_...0.0025675065
.....................
P1072P10725263123['../../allConstraintsAnalysis/typeConstraint_...0.0228375386
P1073P1073226322['../../allConstraintsAnalysis/typeConstraint_...0.0096282285
P16P1649781238['../../allConstraintsAnalysis/typeConstraint_...0.00475850019
P1465P14651507110['../../allConstraintsAnalysis/typeConstraint_...0.00066315081
P1470P1470240['../../allConstraintsAnalysis/typeConstraint_...0.00000024
\n", "

1456 rows × 6 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 correct incorrect \\\n", "Unnamed: 0 \n", "P742 P742 48903 89 \n", "P2663 P2663 554 7 \n", "P5105 P5105 1919 95 \n", "P6938 P6938 3 2 \n", "P3179 P3179 5052 13 \n", "... ... ... ... \n", "P1072 P1072 5263 123 \n", "P1073 P1073 2263 22 \n", "P16 P16 49781 238 \n", "P1465 P1465 15071 10 \n", "P1470 P1470 24 0 \n", "\n", " paths \\\n", "Unnamed: 0 \n", "P742 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P2663 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P5105 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P6938 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P3179 ['../../allConstraintsAnalysis/typeConstraint_... \n", "... ... \n", "P1072 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P1073 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P16 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P1465 ['../../allConstraintsAnalysis/typeConstraint_... \n", "P1470 ['../../allConstraintsAnalysis/typeConstraint_... \n", "\n", " violation_ratio total \n", "Unnamed: 0 \n", "P742 0.001817 48992 \n", "P2663 0.012478 561 \n", "P5105 0.047170 2014 \n", "P6938 0.400000 5 \n", "P3179 0.002567 5065 \n", "... ... ... \n", "P1072 0.022837 5386 \n", "P1073 0.009628 2285 \n", "P16 0.004758 50019 \n", "P1465 0.000663 15081 \n", "P1470 0.000000 24 \n", "\n", "[1456 rows x 6 columns]" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF" ] }, { "cell_type": "code", "execution_count": 78, "id": "naked-depth", "metadata": {}, "outputs": [], "source": [ "typeConstDF1 = typeConstDF.add_suffix(\"_type_const\")[['violation_ratio_type_const','incorrect_type_const']].sort_values(by=['violation_ratio_type_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 80, "id": "thousand-embassy", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1 = valTypeConstDF.add_suffix(\"_valuetype_const\")[['violation_ratio_valuetype_const','incorrect_valuetype_const']].sort_values(by=['violation_ratio_valuetype_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 81, "id": "anticipated-calgary", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1 = codepConstDF1.add_suffix(\"_codep_const\")[['violation_ratio_codep_const','incorrect_codep_const']].sort_values(by=['violation_ratio_codep_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 83, "id": "vanilla-trouble", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1 = symmConstDF.add_suffix(\"_symm_const\")[['violation_ratio_symm_const','incorrect_symm_const']].sort_values(by=['violation_ratio_symm_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 84, "id": "inappropriate-permit", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1 = invConstDF.add_suffix(\"_inv_const\")[['violation_ratio_inv_const','incorrect_inv_const']].sort_values(by=['violation_ratio_inv_const'],ascending=False)" ] }, { "cell_type": "code", "execution_count": 85, "id": "technical-colors", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
violation_ratio_inv_constincorrect_inv_const
Unnamed: 0
P16050.940000188
P34480.8793464249
P9250.8333335
P9260.8333335
P10290.8109082037
\n", "
" ], "text/plain": [ " violation_ratio_inv_const incorrect_inv_const\n", "Unnamed: 0 \n", "P1605 0.940000 188\n", "P3448 0.879346 4249\n", "P925 0.833333 5\n", "P926 0.833333 5\n", "P1029 0.810908 2037" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1_1.head()" ] }, { "cell_type": "code", "execution_count": 86, "id": "utility-criticism", "metadata": {}, "outputs": [], "source": [ "typeConstDF1.index.names = ['property']\n", "typeConstDF1 = typeConstDF1.reset_index()[['violation_ratio_type_const','incorrect_type_const']]" ] }, { "cell_type": "code", "execution_count": 87, "id": "according-protection", "metadata": {}, "outputs": [], "source": [ "valTypeConstDF1.index.names = ['property']\n", "valTypeConstDF1 = valTypeConstDF1.reset_index()[['violation_ratio_valuetype_const','incorrect_valuetype_const']]" ] }, { "cell_type": "code", "execution_count": 88, "id": "choice-relationship", "metadata": {}, "outputs": [], "source": [ "codepConstDF1_1.index.names = ['property']\n", "codepConstDF1_1 = codepConstDF1_1.reset_index()[['violation_ratio_codep_const','incorrect_codep_const']]" ] }, { "cell_type": "code", "execution_count": 89, "id": "atomic-foster", "metadata": {}, "outputs": [], "source": [ "symmConstDF1_1.index.names = ['property']\n", "symmConstDF1_1 = symmConstDF1_1.reset_index()[['violation_ratio_symm_const','incorrect_symm_const']]" ] }, { "cell_type": "code", "execution_count": 90, "id": "compatible-silly", "metadata": {}, "outputs": [], "source": [ "invConstDF1_1.index.names = ['property']\n", "invConstDF1_1 = invConstDF1_1.reset_index()[['violation_ratio_inv_const','incorrect_inv_const']]" ] }, { "cell_type": "code", "execution_count": 102, "id": "portuguese-concentrate", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = typeConstDF1.join(valTypeConstDF1).join(codepConstDF1_1).join(symmConstDF1_1).join(invConstDF1_1).rename(columns={'violation_ratio_type_const':'type', 'violation_ratio_valuetype_const': 'value type', 'violation_ratio_codep_const': 'irs', 'violation_ratio_symm_const': 'symmetric', 'violation_ratio_inv_const': 'inverse'}).reset_index()" ] }, { "cell_type": "code", "execution_count": 103, "id": "twenty-gross", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indextypeincorrect_type_constvalue typeincorrect_valuetype_constirsincorrect_codep_constsymmetricincorrect_symm_constinverseincorrect_inv_const
001.041.01369.01.01.01.0000002.00.940000188.0
111.041.05.01.08.00.92452849.00.8793464249.0
221.011.013.01.01.00.628133777.00.8333335.0
331.031.03.01.01.00.256228144.00.8333335.0
441.0641.012.01.042211.00.1612201919.00.8109082037.0
\n", "
" ], "text/plain": [ " index type incorrect_type_const value type incorrect_valuetype_const \\\n", "0 0 1.0 4 1.0 1369.0 \n", "1 1 1.0 4 1.0 5.0 \n", "2 2 1.0 1 1.0 13.0 \n", "3 3 1.0 3 1.0 3.0 \n", "4 4 1.0 64 1.0 12.0 \n", "\n", " irs incorrect_codep_const symmetric incorrect_symm_const inverse \\\n", "0 1.0 1.0 1.000000 2.0 0.940000 \n", "1 1.0 8.0 0.924528 49.0 0.879346 \n", "2 1.0 1.0 0.628133 777.0 0.833333 \n", "3 1.0 1.0 0.256228 144.0 0.833333 \n", "4 1.0 42211.0 0.161220 1919.0 0.810908 \n", "\n", " incorrect_inv_const \n", "0 188.0 \n", "1 4249.0 \n", "2 5.0 \n", "3 5.0 \n", "4 2037.0 " ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF" ] }, { "cell_type": "code", "execution_count": 105, "id": "nervous-herald", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF = constAnalysisDF.melt('index', var_name='constraint', value_name='VR')" ] }, { "cell_type": "code", "execution_count": 98, "id": "standard-conditioning", "metadata": {}, "outputs": [], "source": [ "constAnalysisDF['VR'] *= 100" ] }, { "cell_type": "code", "execution_count": 106, "id": "advisory-nutrition", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexconstraintVR
00type1.0
11type1.0
22type1.0
33type1.0
44type1.0
............
145551451incorrect_inv_constNaN
145561452incorrect_inv_constNaN
145571453incorrect_inv_constNaN
145581454incorrect_inv_constNaN
145591455incorrect_inv_constNaN
\n", "

14560 rows × 3 columns

\n", "
" ], "text/plain": [ " index constraint VR\n", "0 0 type 1.0\n", "1 1 type 1.0\n", "2 2 type 1.0\n", "3 3 type 1.0\n", "4 4 type 1.0\n", "... ... ... ...\n", "14555 1451 incorrect_inv_const NaN\n", "14556 1452 incorrect_inv_const NaN\n", "14557 1453 incorrect_inv_const NaN\n", "14558 1454 incorrect_inv_const NaN\n", "14559 1455 incorrect_inv_const NaN\n", "\n", "[14560 rows x 3 columns]" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constAnalysisDF" ] }, { "cell_type": "code", "execution_count": 72, "id": "organic-female", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "[Text(0.5, 0, 'Properties'),\n", " Text(0, 0.5, 'Violation Ratio (in %)'),\n", " [Text(-200.0, 0, ''),\n", " Text(0.0, 0, ''),\n", " Text(200.0, 0, ''),\n", " Text(400.0, 0, ''),\n", " Text(600.0, 0, ''),\n", " Text(800.0, 0, ''),\n", " Text(1000.0, 0, ''),\n", " Text(1200.0, 0, ''),\n", " Text(1400.0, 0, ''),\n", " Text(1600.0, 0, '')]]" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10, 6))\n", "ax = sns.scatterplot(x='index',y='VR',hue='constraint',data=constAnalysisDF)\n", "ax.set(xlabel=\"Properties\", ylabel = \"Violation Ratio (in %)\",xticklabels=[])" ] }, { "cell_type": "code", "execution_count": null, "id": "interior-formula", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "baking-pierce", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "subsequent-reverse", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "unknown-racing", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "lyric-section", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "stuck-criticism", "metadata": {}, "source": [ "# Analysis on properties with constraints" ] }, { "cell_type": "code", "execution_count": 26, "id": "driven-reference", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-03 09:14:12 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " PARAS: ['P2302']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->()\" \\\n", " -o ../../constraintsOP/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 39, "id": "exciting-focus", "metadata": {}, "outputs": [], "source": [ "!kgtk unique -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz --column node1 -o ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 42, "id": "flush-romania", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "node1\tlabel\tnode2\r\n", "P10\tcount\t17\r\n", "P1000\tcount\t10\r\n", "P1001\tcount\t26\r\n", "P1002\tcount\t9\r\n", "P1003\tcount\t20\r\n", "P1004\tcount\t33\r\n", "P1005\tcount\t21\r\n", "P1006\tcount\t26\r\n", "P1007\tcount\t19\r\n" ] } ], "source": [ "!head ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 43, "id": "chemical-harris", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props = pd.read_csv(\"../../constraintsOP/claims.constraints_list.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 44, "id": "higher-underground", "metadata": {}, "outputs": [], "source": [ "props2 = props.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 45, "id": "light-appreciation", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8100" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 48, "id": "yellow-helmet", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2336, 8100)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "detected-skiing", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": 32, "id": "processed-perfume", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props2 = pd.read_csv(\"../../constraintsOP/claims.propList.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 33, "id": "increasing-graphics", "metadata": {}, "outputs": [], "source": [ "props2 = props2.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 34, "id": "posted-ukraine", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8193" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 35, "id": "fifth-provision", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2415, 8193)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplit/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "married-heating", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": null, "id": "magnetic-conditions", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "kgtkEnv", "language": "python", "name": "kgtkenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "318px" }, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "oldHeight": 122, "position": { "height": "40px", "left": "1170px", "right": "20px", "top": "120px", "width": "250px" }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "varInspector_section_display": "none", "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }