{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "judicial-latvia", "metadata": {}, "outputs": [], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.tsv.gz \\\n", " --match \"(x)-[r{label: property}]->(y{wikidatatype: wikidatatype})\" \\\n", " --return 'r.id as `id`, x as `node1`, property as `label`, y as `node2`, wikidatatype as `node2;wikidatatype`' \\\n", " -o ../../data/claims.edited.tsv \\\n", " --graph-cache ~/temp1.sqlite3.db" ] }, { "cell_type": "code", "execution_count": null, "id": "attended-sphere", "metadata": {}, "outputs": [], "source": [ "!kgtk --debug query -i ../../data/removed_statements.tsv \\\n", " ../../gdrive-kgtk-dump-2020-12-07/metadata.property.datatypes.tsv.gz \\\n", " --match \"removed: (x)-[r{label: property}]->(y), datatypes: (property)-[]->(datatype)\" \\\n", " --return 'r.id as `id`, x as `node1`, property as `label`, y as `node2`, datatype as `node2;wikidatatype`' \\\n", " -o ../../data/removed_statements_w_datatype.tsv --graph-cache ~/temp1.sqlite3.db" ] }, { "cell_type": "code", "execution_count": null, "id": "adjacent-disorder", "metadata": {}, "outputs": [], "source": [ "!kgtk cat -i ../../data/claims.edited.tsv \\\n", " ../../data/removed_statements_w_datatype.tsv \\\n", " -o ../../data/claims.w_removed_statements.tsv" ] }, { "cell_type": "code", "execution_count": 1, "id": "juvenile-ability", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a8ed952b33f9462f86bdcae389daaf11", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1225057250 [00:00(node2), c: (rLabel)-[:P2308]->(parent), d: (node1)-[]->(par), c: (eLabel)-[:P2303]->(eNode)\" \\\n", " --where 'nodeProp.label = rLabel and (par = parent or (rLabel = eLabel and node1 = eNode))' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2, max(parent) as `node1;ancestor`' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.all.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 4, "id": "abstract-retreat", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 22:33:26 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT graph_11_c1.\"id\", graph_11_c1.\"node1\", graph_11_c1.\"label\", graph_11_c1.\"node2\"\r\n", " FROM graph_11 AS graph_11_c1, graph_14 AS graph_14_c2\r\n", " WHERE graph_11_c1.\"node1\"=graph_14_c2.\"node1\"\r\n", " AND (graph_14_c2.\"node2\" IN (?, ?, ?))\r\n", " PARAS: ['Q1238720', 'Q3331189', 'Q47461344']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplitWRemoved2/claims.P996.tsv \\\n", " ../../wikidata-20210215/derived.isastar.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q1238720\",\"Q3331189\",\"Q47461344\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.P996.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 7, "id": "strange-truck", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "81289 ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv\r\n" ] } ], "source": [ "!wc -l ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv" ] }, { "cell_type": "code", "execution_count": 8, "id": "finnish-hampton", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 22:49:54 sqlstore]: IMPORT graph directly into table graph_15 from /data/wd-correctness/propertiesSplit/claims.P991.tsv ...\n", "[2021-03-12 22:49:54 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT graph_15_c1.\"id\", graph_15_c1.\"node1\", graph_15_c1.\"label\", graph_15_c1.\"node2\"\n", " FROM graph_15 AS graph_15_c1, graph_5 AS graph_5_c2\n", " WHERE graph_15_c1.\"node1\"=graph_5_c2.\"node1\"\n", " AND (graph_5_c2.\"node2\" IN (?))\n", " PARAS: ['Q40231']\n", "---------------------------------------------\n", "[2021-03-12 22:49:55 sqlstore]: CREATE INDEX on table graph_15 column node1 ...\n", "[2021-03-12 22:49:55 sqlstore]: ANALYZE INDEX on table graph_15 column node1 ...\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplitWRemoved2/claims.P991.tsv \\\n", " ../../wikidata-20210215/derived.P31P279star.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q40231\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.P991.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 9, "id": "elegant-reverse", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 23:54:56 sqlstore]: IMPORT graph directly into table graph_16 from /data/wd-correctness/propertiesSplit/claims.P965.tsv ...\n", "[2021-03-12 23:54:56 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT graph_16_c1.\"id\", graph_5_c2.\"node1\", graph_16_c1.\"label\", graph_16_c1.\"node2\"\n", " FROM graph_16 AS graph_16_c1, graph_5 AS graph_5_c2\n", " WHERE graph_16_c1.\"node1\"=graph_5_c2.\"node1\"\n", " AND (graph_5_c2.\"node2\" IN (?))\n", " PARAS: ['Q6023295']\n", "---------------------------------------------\n", "[2021-03-12 23:54:56 sqlstore]: CREATE INDEX on table graph_16 column node1 ...\n", "[2021-03-12 23:54:56 sqlstore]: ANALYZE INDEX on table graph_16 column node1 ...\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplitWRemoved2/claims.P965.tsv \\\n", " ../../wikidata-20210215/derived.P31P279star.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q6023295\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.P965.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.incorrect.tsv" ] }, { "cell_type": "markdown", "id": "matched-strength", "metadata": {}, "source": [ "# Generate Queries" ] }, { "cell_type": "markdown", "id": "black-insured", "metadata": {}, "source": [ "## Type Constraint" ] }, { "cell_type": "markdown", "id": "interior-humor", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 2, "id": "clinical-brunei", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv('../../constraintsOP/typeConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 3, "id": "assured-cleaners", "metadata": {}, "outputs": [], "source": [ "df1 = df.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 4, "id": "sharing-evolution", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1001P2308[Q102496, Q105985, Q1140371, Q1151067, Q119768...
1P1001P2309[Q30208840]
2P1002P2308[Q630010]
3P1002P2309[Q21514624]
4P1004P2308[Q2221906, Q23413, Q3947, Q41176, Q88291]
\n", "
" ], "text/plain": [ " node1 label node2\n", "0 P1001 P2308 [Q102496, Q105985, Q1140371, Q1151067, Q119768...\n", "1 P1001 P2309 [Q30208840]\n", "2 P1002 P2308 [Q630010]\n", "3 P1002 P2309 [Q21514624]\n", "4 P1004 P2308 [Q2221906, Q23413, Q3947, Q41176, Q88291]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "still-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
69P105P2308[Q16521]
70P105P2309[Q21503252]
71P105P2316[Q21502408]
\n", "
" ], "text/plain": [ " node1 label node2\n", "69 P105 P2308 [Q16521]\n", "70 P105 P2309 [Q21503252]\n", "71 P105 P2316 [Q21502408]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1[df1['node1'] == 'P105']" ] }, { "cell_type": "markdown", "id": "solid-browser", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 6, "id": "bright-impossible", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "392202b0f3984101a4787815b59edf5e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4810 [00:00(node2), \" + parentFile + \": (node1)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/const120_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\")\n", "\n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 7, "id": "electrical-agreement", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1465" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 8, "id": "outside-stupid", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,33):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/typeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "competitive-canvas", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 3, "id": "casual-perth", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e2aa42859569406cb8ee7fb237917535", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "22777e2fe5a34330b18a7bc2d970b4d0", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/334 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P13032031482610[../../allConstraintsAnalysisWRemoved2/typeCon...0.012685
P30170594928017[../../allConstraintsAnalysisWRemoved2/typeCon...0.038172
P39193393158[../../allConstraintsAnalysisWRemoved2/typeCon...0.044495
P618526912[../../allConstraintsAnalysisWRemoved2/typeCon...0.042705
P3922182440[../../allConstraintsAnalysisWRemoved2/typeCon...0.021459
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P1303 203148 2610 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P301 705949 28017 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P3919 3393 158 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P6185 269 12 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P3922 1824 40 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "\n", " violation_ratio \n", "P1303 0.012685 \n", "P301 0.038172 \n", "P3919 0.044495 \n", "P6185 0.042705 \n", "P3922 0.021459 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "competitive-peeing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P538010[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P558905[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P800406[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P135402[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P5051081[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P651004[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P173402[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P601403[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P2303044[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P6001016[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P231304[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P580201[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P8738014[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P231205[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
P231104[../../allConstraintsAnalysisWRemoved2/typeCon...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P538 0 10 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P5589 0 5 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P8004 0 6 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P1354 0 2 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P5051 0 81 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P6510 0 4 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P1734 0 2 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P6014 0 3 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P2303 0 44 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P6001 0 16 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P2313 0 4 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P5802 0 1 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P8738 0 14 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P2312 0 5 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P2311 0 4 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "\n", " violation_ratio \n", "P538 1.0 \n", "P5589 1.0 \n", "P8004 1.0 \n", "P1354 1.0 \n", "P5051 1.0 \n", "P6510 1.0 \n", "P1734 1.0 \n", "P6014 1.0 \n", "P2303 1.0 \n", "P6001 1.0 \n", "P2313 1.0 \n", "P5802 1.0 \n", "P8738 1.0 \n", "P2312 1.0 \n", "P2311 1.0 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 10, "id": "backed-corruption", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../../allConstraintsAnalysisWRemoved2/typeConstraint_Final/normal/claims.type-constraints.instanceOf.P4945.correct.tsv',\n", " '../../allConstraintsAnalysisWRemoved2/typeConstraint_Final/normal/claims.type-constraints.instanceOf.P4945.incorrect.tsv']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(typeConstDF.loc['P4945'].paths)" ] }, { "cell_type": "code", "execution_count": 11, "id": "clinical-lawsuit", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1465.000000\n", "mean 0.113799\n", "std 0.226303\n", "min 0.000000\n", "25% 0.006623\n", "50% 0.022537\n", "75% 0.085443\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 12, "id": "wanted-domestic", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEICAYAAACwDehOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAZJ0lEQVR4nO3debglVXnv8e9PZqOCQmsQ0HbAgTggaRWTa0SJNwoKGg3RgCAPkZhonsThKg4xoCaSm6uoN0ZF8dqIA6K52hGjV8ExCWjjDGhoFaQBoUUaUFAG3/tHrS62h3P67NN99t5n+H6e5zxdtWpV1Vt776631lq1a6eqkCQJ4A6TDkCStHCYFCRJPZOCJKlnUpAk9UwKkqSeSUGS1DMpSNNI8sok7x7j/s5PcsAQ9Q5Isn4r9vOOJH+zpeuPS5LHJvnepONYjkwKS1iSnw38/SrJjQPzh48pht2TnJLkiiTXJ/lukhOS/MYI9/n5JH+6Nduoqr+vqqG2keT4JKdtZvmnkrx2mvJDk/w4ybZV9VtV9fmtCHm6/T43yZcHy6rq+VX1uvncT9vX8Ulubp+tjUn+I8lj5rB+Jbn/QJxfqqoHznecmp1JYQmrqjtt+gN+BDx1oOz9o95/krsB/wnsBDymqu4MPBHYBbjfqPe/mbi2HfMuVwNHJMmU8ucA76+qW8Ycz6ic3j5ruwGfA86YcDzaElXl3zL4Ay4Gfh/YHvgp8NCBZXcHbgBWAAcA64FXAj9p6x0+UHcH4H/RJZkrgXcAO82wz9cD3wbusJm4fgf4KnBt+/d3BpZ9Hngd8O/A9cD/A3Zry3YETgOuBja2de8B/B1wK/AL4GfAP7X6BbwAuAj4YSt7C3ApcB1wHvDYgX0fD5zWple29Y9qx/0T4FVt2ZOAm4Cb2/6+Oc0x7tSO7/cGyu7aYnz44Psz8Bq/Gbi8/b0Z2KEtOwBYP7Cd44Dvt9fnAuDprfzBbfu3trg2tvL3Aq8fWP95wLr2mVgD3HNgWQHPb6/ZRuBtQGZ4H/vXq83v09Zf0eYfRXeBsBG4AvgnYPu27Iut7s9brH88zXE+mO7zsBE4HzhkYNlB7divBy4DXjrp/2+L+c+WwjJTVTcBHwKOGCh+NnBWVW1o879Jd7W3B92J8OQkm5ryJwIPAPYF7t/qvGaG3f0+8C9V9avpFraWxJnAW4FdgTcBZybZdaDanwBH0yWu7YGXtvKjgJ2Bvdq6zwdurKpXAV8CXlhdi+iFA9t6GvBouhMWdIlkX+BuwAeAM5LsOMOxAPw34IHAgcBrkjy4qj4F/D3tKrmqHj51paq6EfgwcORA8WHAd6vqm9Ps51XA/i22h9OdUF89Q0zfBx5L91qcAJyWZPequpDuNfnPFtcuU1dM8gTgDS2W3YFL6D4bg54CPBJ4WKv3BzPEMbjd7duxXg1c04pvBV5E97l6DN1r+BcAVfV7rc7DW6ynT9nedsC/0l0U3B34S+D9A5/JU4A/q64l+hDg7Nli1MxMCsvTauDZA90ZzwHeN6XO31TVL6vqC3Qn7sNa/WOBF1XVT6vqeroT4rNm2M+udFeFMzkYuKiq3ldVt1TVB4HvAk8dqPN/quq/Bk6s+7bym9v2719Vt1bVeVV13SzH/YYW940AVXVaVV3d9v1Guiv0zfVjn1BVN7YT+TfpTtjDWg08cyDpHNnKpnM48Nqquqol6hPo3qPbqaozquryqvpVO5leRJdEhnE48J6q+lpV/RJ4BfCYJCsH6pxYVRur6kd0XUL7bmZ7hyXZCNxI1wJ5ZrWusfb+nNNe64uBdwKPGzLO/YE7tVhuqqqzgU/QXcxA91nYJ8ldquqaqvrakNvVNEwKy1BVnUvXXXRAkgfRXfGvGahyTVX9fGD+EuCedN1LdwTOa4OJG4FPtfLpXE13BTqTe7ZtD7qErvWxyY8Hpm+gOzlAl8Q+DXwoyeVJ/me7otycSwdnkrw0yYVJrm3HsjPdlexMZoplVlX1Zbpup6cluR/difsDM1Sf+rpsev1vJ8mRSb4x8H48hM0fw4z7qaqf0b1nw7z+0/lwa5HcA/gO8NsDcT4gySfawPp1dBcTc4nz0iktzsHPyTPoupAuSfKFuQxw6/ZMCsvXaroupOcAH6mqXwwsu+uUu4PuRde3/RO6q8Dfqqpd2t/O1Q0uTuezwNOTzPQ5uxy495Sye9H1C29WVd1cVSdU1T504xJP4bbumZke/duXJ3ks8DK6LpG7tpPZtcDUweBhDPuo4VNbjEcAn66qK2eoN/V12fT6/5ok9wbeBbwQ2LUdw3e47Rhmi+vX9tPe810Z4vXfnKr6CV2L8vgkmy4K3k7XCty7qu5CN2Y17Gt9ObDXlM9R/zmpqq9W1aF0XUsfo2tRaguZFJav04Cn052gTp1m+QlJtm8nz6cAZ7QrtXcBJyW5O0CSPZLM1M/8JuAuwOp2AttU/01JHgZ8EnhAkj9Jsm2SP6br7//EbMEneXyShybZhm6g+GZg05XklcB9Z9nEnYFbgA3Atkle02LdElcCKzeT/DY5lW6c5XnM3HUE8EHg1UlWJNmNbsxmultef4PuxL8BIMnRdC2Fwbj2bH38M+3n6CT7JtmB7ur93Na9s1Wq6nt0LbmXtaI7071PP2ut0z+fssrm3rNNLduXJdmufZ/jqXStxO2THJ5k56q6ue1j2jEsDceksExV1aXA1+hOKl+asvjHdAOElwPvB55fVd9ty15Od7fKOa0b4LPM0A9fVT+lu4q/GTg3yfXAWXRX5Ouq6mq6hPMSum6LlwFPaVeas/lN4CN0J4ELgS9w27jIW+j6769J8tYZ1v80XdfXf9F1RfyCKd1Lc7Dp1surk8zYn91Otv9BdzJfM1M9uru21gLfort762utbOr2LgDeSHdXz5XAQ+nu1NrkbLo7dX6c5HavaVV9Fvgb4KN0Yz/3Y+bxoS3xj8Cx7QLipXQ3DVxPd2Fx+pS6x9NdPGxMctiUOG+iSwJPpmut/jNw5MBn8jnAxe3z+Hy6sRJtoVT5IzvLVZL3AJdX1asHyg6gu7Vwz0nFJWlyxv0lHi0Q7Q6TPwQeMeFQJC0gdh8tQ0leRzcg+Y9V9cNJxyNp4bD7SJLUs6UgSeot6jGF3XbbrVauXDnpMCRpUTnvvPN+UlXTful0USeFlStXsnbt2kmHIUmLSpKpTxLo2X0kSeqZFCRJPZOCJKlnUpAk9UwKkqSeSUGS1DMpSJJ6JgVJUs+kIEnqLepvNG+Nlced2U9ffOLBE4xEkhYOWwqSpJ5JQZLUMylIknomBUlSz6QgSeqZFCRJPZOCJKlnUpAk9UwKkqSeSUGS1DMpSJJ6JgVJUs+kIEnqmRQkST2TgiSpZ1KQJPVMCpKknklBktQzKUiSeiYFSVLPpCBJ6o08KSTZJsnXk3yizd8nyblJ1iU5Pcn2rXyHNr+uLV856tgkSb9uHC2FvwIuHJj/B+Ckqro/cA1wTCs/BrimlZ/U6kmSxmikSSHJnsDBwLvbfIAnAB9pVVYDT2vTh7Z52vIDW31J0piMuqXwZuBlwK/a/K7Axqq6pc2vB/Zo03sAlwK05de2+r8mybFJ1iZZu2HDhhGGLknLz8iSQpKnAFdV1Xnzud2qOrmqVlXVqhUrVsznpiVp2dt2hNv+XeCQJAcBOwJ3Ad4C7JJk29Ya2BO4rNW/DNgLWJ9kW2Bn4OoRxidJmmJkLYWqekVV7VlVK4FnAWdX1eHA54BntmpHAR9v02vaPG352VVVo4pPknR7k/iewsuBFydZRzdmcEorPwXYtZW/GDhuArFJ0rI2yu6jXlV9Hvh8m/4B8Khp6vwC+KNxxCNJmp7faJYk9UwKkqSeSUGS1DMpSJJ6JgVJUs+kIEnqmRQkST2TgiSpZ1KQJPVMCpKknklBktQzKUiSeiYFSVLPpCBJ6pkUJEk9k4IkqWdSkCT1TAqSpJ5JQZLUMylIknomBUlSz6QgSeqZFCRJPZOCJKlnUpAk9UwKkqSeSUGS1DMpSJJ6JgVJUs+kIEnqmRQkST2TgiSpZ1KQJPVMCpKknklBktQzKUiSeiNLCkl2TPKVJN9Mcn6SE1r5fZKcm2RdktOTbN/Kd2jz69rylaOKTZI0vVG2FH4JPKGqHg7sCzwpyf7APwAnVdX9gWuAY1r9Y4BrWvlJrZ4kaYxGlhSq87M2u137K+AJwEda+WrgaW360DZPW35gkowqPknS7Y10TCHJNkm+AVwFfAb4PrCxqm5pVdYDe7TpPYBLAdrya4Fdp9nmsUnWJlm7YcOGUYYvScvOSJNCVd1aVfsCewKPAh40D9s8uapWVdWqFStWbO3mJEkDxnL3UVVtBD4HPAbYJcm2bdGewGVt+jJgL4C2fGfg6nHEJ0nqjPLuoxVJdmnTOwFPBC6kSw7PbNWOAj7epte0edrys6uqRhWfJOn2tp29yhbbHVidZBu65PPhqvpEkguADyV5PfB14JRW/xTgfUnWAT8FnjXC2CRJ0xgqKSR5aFV9ey4brqpvAY+YpvwHdOMLU8t/AfzRXPYhSZpfw3Yf/XP7ItpfJNl5pBFJkiZmqKRQVY8FDqcbCD4vyQeSPHGkkUmSxm7ogeaqugh4NfBy4HHAW5N8N8kfjio4SdJ4DZUUkjwsyUl0dw89AXhqVT24TZ80wvgkSWM07N1H/xt4N/DKqrpxU2FVXZ7k1SOJTJI0dsMmhYOBG6vqVoAkdwB2rKobqup9I4tOkjRWw44pfBbYaWD+jq1MkrSEDJsUdhx44ilt+o6jCUmSNCnDJoWfJ9lv00yS3wZu3Ex9SdIiNOyYwl8DZyS5HAjwm8AfjyooSdJkDJUUquqrSR4EPLAVfa+qbh5dWJKkSZjLA/EeCaxs6+yXhKo6dSRRSZImYtgH4r0PuB/wDeDWVlyASUGSlpBhWwqrgH38fQNJWtqGvfvoO3SDy5KkJWzYlsJuwAVJvgL8clNhVR0ykqgkSRMxbFI4fpRBSJIWhmFvSf1CknsDe1fVZ5PcEdhmtKFJksZt2EdnPw/4CPDOVrQH8LERxSRJmpBhB5pfAPwucB30P7hz91EFJUmajGGTwi+r6qZNM0m2pfuegiRpCRk2KXwhySuBndpvM58B/OvowpIkTcKwSeE4YAPwbeDPgE/S/V6zJGkJGfbuo18B72p/S87K487spy8+8eAJRiJJkzXss49+yDRjCFV133mPSJI0MXN59tEmOwJ/BNxt/sORJE3SUGMKVXX1wN9lVfVmwH4WSVpihu0+2m9g9g50LYe5/BaDJGkRGPbE/saB6VuAi4HD5j0aSdJEDXv30eNHHYgkafKG7T568eaWV9Wb5iccSdIkzeXuo0cCa9r8U4GvABeNIihJ0mQMmxT2BParqusBkhwPnFlVR4wqMEnS+A37mIt7ADcNzN/UyiRJS8iwLYVTga8k+b9t/mnA6pFEJEmamGHvPvq7JP8GPLYVHV1VXx9dWJKkSRi2+wjgjsB1VfUWYH2S+4woJknShAz7c5x/C7wceEUr2g44bZZ19kryuSQXJDk/yV+18rsl+UySi9q/d23lSfLWJOuSfGvKt6glSWMwbEvh6cAhwM8Bqupy4M6zrHML8JKq2gfYH3hBkn3ofpvhrKraGzirzQM8Gdi7/R0LvH0OxyFJmgfDJoWbqqpoj89O8huzrVBVV1TV19r09cCFwB7Aodw2SL2abtCaVn5qdc4Bdkmy+7AHIknaesMmhQ8neSfdifp5wGeZww/uJFkJPAI4F7hHVV3RFv2Y225t3QO4dGC19a1s6raOTbI2ydoNGzYMG4IkaQiz3n2UJMDpwIOA64AHAq+pqs8Ms4MkdwI+Cvx1VV3Xba5TVZXkdj/eszlVdTJwMsCqVavmtK4kafNmTQrtxP3JqnooMFQi2CTJdnQJ4f1V9S+t+Moku1fVFa176KpWfhmw18Dqe7YySdKYDNt99LUkj5zLhlsL4xTgwikPzFsDHNWmjwI+PlB+ZLsLaX/g2oFuJknSGAz7jeZHA0ckuZjuDqTQNSIetpl1fhd4DvDtJN9oZa8ETqQbozgGuITbfpfhk8BBwDrgBuDo4Q9DkjQfNpsUktyrqn4E/MFcN1xVX6ZLHtM5cJr6BbxgrvuRJM2f2VoKH6N7OuolST5aVc8YQ0ySpAmZbUxh8Er/vqMMRJI0ebMlhZphWpK0BM3WffTwJNfRtRh2atNw20DzXUYanSRprDabFKpqm3EFIkmavLk8OluStMSZFCRJPZOCJKlnUpAk9UwKkqSeSUGS1DMpSJJ6JgVJUs+kIEnqmRQkST2TgiSpZ1KQJPVMCpKknklBktQzKUiSeiYFSVLPpCBJ6pkUJEm92X6jedlZedyZ/fTFJx48wUgkafxsKUiSeiYFSVLPpCBJ6pkUJEk9k4IkqWdSkCT1TAqSpJ5JQZLUMylIknomBUlSz6QgSeqZFCRJvZElhSTvSXJVku8MlN0tyWeSXNT+vWsrT5K3JlmX5FtJ9htVXJKkmY2ypfBe4ElTyo4DzqqqvYGz2jzAk4G929+xwNtHGJckaQYjSwpV9UXgp1OKDwVWt+nVwNMGyk+tzjnALkl2H1VskqTpjXtM4R5VdUWb/jFwjza9B3DpQL31rex2khybZG2StRs2bBhdpJK0DE1soLmqCqgtWO/kqlpVVatWrFgxgsgkafkad1K4clO3UPv3qlZ+GbDXQL09W5kkaYzGnRTWAEe16aOAjw+UH9nuQtofuHagm0mSNCYj+43mJB8EDgB2S7Ie+FvgRODDSY4BLgEOa9U/CRwErANuAI4eVVySpJmNLClU1bNnWHTgNHULeMGoYpEkDcdvNEuSeiYFSVLPpCBJ6pkUJEk9k4IkqTeyu4+WgpXHndlPX3ziwROMRJLGw5aCJKlnUpAk9UwKkqSeYwpDGhxfAMcYJC1NthQkST2TgiSpZ1KQJPVMCpKknklBktQzKUiSeiYFSVLPpCBJ6pkUJEk9k4IkqWdSkCT1TAqSpJ4PxNtCUx+Qt4kPypO0mNlSkCT1bClI0iIxjp8ItqUgSerZUhihcWR1SZpPJoV5NtMAtCQtBnYfSZJ6JgVJUs/uowlwrEHSQmVLQZLUs6UwJlszAG3LQtK4mBQmbKYT/jBJxGQhab6ZFJY4E4ekuTApLBFLoWWx0OOTlgOTwgKyEL745olZWt4WVFJI8iTgLcA2wLur6sQJh7TgTCpxmCyWH9/z5WnBJIUk2wBvA54IrAe+mmRNVV0w2ciWpvm6G2pYMw2iz/VkM9d15zqQvzXbnKnOsNud6z7GuZ1x7HchxzrJBDnuC8FU1Vh3OJMkjwGOr6o/aPOvAKiqN8y0zqpVq2rt2rVbtL+F0FWj5WPY5DHK7QyTmMf9/2JrYhpFkp+rhfLazVWS86pq1bTLFlBSeCbwpKr60zb/HODRVfXCKfWOBY5tsw8EvreFu9wN+MkWrrtYeczLg8e8PGzNMd+7qlZMt2DBdB8Nq6pOBk7e2u0kWTtTplyqPOblwWNeHkZ1zAvpMReXAXsNzO/ZyiRJY7KQksJXgb2T3CfJ9sCzgDUTjkmSlpUF031UVbckeSHwabpbUt9TVeePcJdb3QW1CHnMy4PHvDyM5JgXzECzJGnyFlL3kSRpwkwKkqTekk8KSZ6U5HtJ1iU5bprlOyQ5vS0/N8nKCYQ5r4Y45hcnuSDJt5KcleTek4hzPs12zAP1npGkkiz62xeHOeYkh7X3+vwkHxh3jPNtiM/2vZJ8LsnX2+f7oEnEOV+SvCfJVUm+M8PyJHlrez2+lWS/rd5pVS3ZP7oB6+8D9wW2B74J7DOlzl8A72jTzwJOn3TcYzjmxwN3bNN/vhyOudW7M/BF4Bxg1aTjHsP7vDfwdeCubf7uk457DMd8MvDnbXof4OJJx72Vx/x7wH7Ad2ZYfhDwb0CA/YFzt3afS72l8ChgXVX9oKpuAj4EHDqlzqHA6jb9EeDAJBljjPNt1mOuqs9V1Q1t9hy674QsZsO8zwCvA/4B+MU4gxuRYY75ecDbquoagKq6aswxzrdhjrmAu7TpnYHLxxjfvKuqLwI/3UyVQ4FTq3MOsEuS3bdmn0s9KewBXDowv76VTVunqm4BrgV2HUt0ozHMMQ86hu5KYzGb9Zhbs3qvqloqD70a5n1+APCAJP+e5Jz2FOLFbJhjPh44Isl64JPAX44ntImZ6//3WS2Y7ylo/JIcAawCHjfpWEYpyR2ANwHPnXAo47YtXRfSAXStwS8meWhVbZxkUCP2bOC9VfXG9pDN9yV5SFX9atKBLRZLvaUwzKMz+jpJtqVrcl49luhGY6jHhST5feBVwCFV9csxxTYqsx3znYGHAJ9PcjFd3+uaRT7YPMz7vB5YU1U3V9UPgf+iSxKL1TDHfAzwYYCq+k9gR7oHxy1V8/54oKWeFIZ5dMYa4Kg2/Uzg7GojOIvUrMec5BHAO+kSwmLvZ4ZZjrmqrq2q3apqZVWtpBtHOaSqtuy56wvDMJ/tj9G1EkiyG1130g/GGON8G+aYfwQcCJDkwXRJYcNYoxyvNcCR7S6k/YFrq+qKrdngku4+qhkenZHktcDaqloDnELXxFxHN6DzrMlFvPWGPOZ/BO4EnNHG1H9UVYdMLOitNOQxLylDHvOngf+e5ALgVuB/VNWibQUPecwvAd6V5EV0g87PXcwXeUk+SJfYd2vjJH8LbAdQVe+gGzc5CFgH3AAcvdX7XMSvlyRpni317iNJ0hyYFCRJPZOCJKlnUpAk9UwKkqSeSUGS1DMpSJJ6/x+eh/5eYN+e+gAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "sufficient-hollywood", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios (<=0.05)')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEICAYAAACwDehOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdiklEQVR4nO3deZwdVZ338c9Xwr4FSMMgWwOyCwSMuE00wqCIIqDIMmHRcQg86rzGUR9kUwIo4gK4jWJQHoLsy+Cg4AIoMOqwJBDZEZAgWQhNICSBsAR+zx91uigu93bf7tt163b39/163VdXnTpV9Tt3qV+dU3VvKyIwMzMDeFPVAZiZWedwUjAzs5yTgpmZ5ZwUzMws56RgZmY5JwUzM8s5KVhHk3S8pJ+2cX/3SprURL1Jkua0sJ+zJX1lsOu3i6SJkh4sYbtdkh6QtOpQb7udJP2bpG9WHcdQclJoA0lLC49XJS0rzE9uUwwbSvqZpPmSlqQP5MmSVi9xnzdK+tdWthERp0VEU9uQNFXSBX0s/42kU+qU7yvpCUljImKHiLixhZDr7feTkv5YLIuIoyPi1KHcT9rXVEkvp/fWIkl/lvSuAawfkt5SiPN/ImKboY4TOBY4LyKWlbBtJK0s6VxJi9Nr+4V+6v9Hqrc4rbdyYdnsms/s7wqrngNMlrR+Ge2ogpNCG0TEGr0P4O/APoWyC8vev6R1gf8FVgXeFRFrAnsCY4Ety95/H3GNafMupwOHSlJN+WHAhRGxvM3xlOXS9F4bB/wBuLzieF4nHXCPABom8Jr6GwxiN1OBrYDNgPcDx0jaq8H2P0iWpPZI9bcATq6pVvzMfqC3MCJeAH4NHD6IGDtTRPjRxgcwG/gnYCXgaWDHwrL1geeBLmASMAc4HngqrTe5UHdl4DtkSWYBcDawaoN9fg24G3hTH3G9G7gdeDb9fXdh2Y3AqcCfgCXA74BxadkqZB/uhcCitO4GwNeBV4AXgKXAD1P9AD4LPAQ8msq+BzwOLAZmAhML+54KXJCmu9P6R6R2PwWckJbtBbwEvJz295c6bVw1te+9hbJ1Uow7F1+fwnP8XWBeenwXWDktmwTMKWznWOCR9PzcB+yfyrdL238lxbUolZ8HfK2w/pHAw+k9cTXw5sKyAI5Oz9ki4D8BNXgd8+crzW+f1u9K87uRnSAsAuYDPwRWSstuTnWfS7EeVKed25G9HxYB9wIfLSzbO7V9CTAX+FKDGN8LPNzP52Q1smT9e+C+QXzO5gEfKMyfClzSoO5FwGmF+T2AJ2o/s33sazLwh6qOKUP9qDyA0faoOej8CPhmYdm/A79M05OA5cCZ6eD0vvRh3SYtPysdPNYF1gR+CXyjwT5vAU7uI6Z1gWfSh3AMcEiaXy8tv5HsgLc12YH1RuD0tOyotO/VgBWAtwFrFdb715p9BXBd2ueqqexQYL207y8CTwCrpGVTeWNSOCfFsTPwIrBdbd0+2noO8NPC/FHArAavzynpuVufLFH/GTi18PoUD5afAN5M1vs+KL1WG6ZlnwT+WBPHeaSkAOxOluB2Ta/1D4Cba56zX5H17DYFeoC9GrSv+HytBJyetj0mlb0NeGd6rruB+4HP1+zrLYX5vJ3AimSJ6/i07d3JEkDve3I+KaGTJdtdG8T4WeCaBsvelV6jZ9L7ZDKFkx2yz8yiBo+7CvsOYIPCegcAdzfY51+Agwrz49L6ve//2WQnXj1kJ0Q716y/K/B01ceWoXp4+Kha04FDCsMZhwE/r6nzlYh4MSJuAq4BDkz1pwD/ERFPR8QS4DTg4Ab7WY/sA9vIh4GHIuLnEbE8Ii4GHgD2KdT5fxHx18jGgC8Dxqfyl9P23xIRr0TEzIhY3E+7v5HiXgYQERdExMK07zPIDox9jWOfHBHLIuIvZB/onfvZX9F04ABJq6T5w1NZPZOBUyLiyYjoIRtSOKxexYi4PCLmRcSrEXEp2Vn9bk3GNBk4NyLuiIgXgeOAd0nqLtQ5PSIWRcTfyYaExvexvQMlLQKWkfVADog0NJZen1vScz0b+AnZCUcz3gmskWJ5KSJ+T5asDknLXwa2l7RWRDwTEXc02M5YsmSSk3SgpAfIkuWjZD3oPSPiwihcd4iIz0TE2AaPnVK1NdLfZwu7eJbs5KmeNerUpVB/MlkC3Yzsuf+tpLGF+kuAtRtse9hxUqhQRNxKNlw0SdK2wFvIzv57PRMRzxXmHyM7G+0iOzOfmS4mLgJ+k8rrWQhs2Ecob07bLnoM2Kgw/0Rh+nle++D9HPgtcImkeZK+JWnFPvYF2VBRTtKXJN0v6dnUlrXJztYaaRRLvyLij2RnzvtJ2pLswH1Rg+q1z0vv8/8Gkg6XNKvweryVvtvQcD8RsZTsNWvm+a/nsogYSzaMdw9Z76A3zq0l/ar3oirZycRA4nw8Il4tlBXfJx8nG0J6TNJNfVzgfoY3HqA3TtuZRZbon2Dwlqa/axXK1qImEdXUr61Lb/2I+FM6CXk+Ir5B1iuZWKi/Jq9PKsOak0L1ppMNnxwGXBHZhate69TcHbQp2VjpU2RngTsUzpLWjuziYj3XA/tLavR6zyM7CyralGxcuE8R8XJEnBwR25Ndl/gIr110a/QTvHm5pInAMcCBwDrpYPYsUHsxuBnN/uTv+SnGQ4HfRsSCBvVqn5fe5/91JG1GNuTxObIhh7FkB+PeNvQX1+v2k17z9Wji+e9LRDxF1qOcKqn3pODHZL3ArSJiLbKhoGaf63nAJjXvo/x9EhG3R8S+ZMNtvyDrUdZzF9lQZDHWM8mSwg3ACcAcSWdJ2qVYL93Ku7TB4960rWfIesbFHuTOZNdA6rm3Tt0FEbGwQf3g9c/ZdmSJbERwUqjeBcD+ZAeo8+ssP1nSSung+RHg8nSmdg5wVu+tcJI2SndR1HMm2dnP9HQA661/pqSdgGuBrSX9s6Qxkg4iu0D5q/6Cl/R+STtKWoHsQvHLQO+Z5AKyOzn6sibZtZMeYIykr/L6s7aBWAB095H8ep1PdrH/SBoPHQFcDJyo7J76ccBXqX/HzOpkB4oeAEmfIuspFOPaWNJKfeznU5LGpztzTgNuTcM7LYmIB8l6csekojXJXqelqXf6f2pW6es16+3ZHiNpRWXf59iHrJe4kqTJktaOiJfTPl5tsJ3bgLGSij0hImJxREyLiHeTDWm9APxS0g2FOkdH4W6+mscOhc2dT/barZPaeSTZ0FQ95wOflrR9GhY6sbeupE0lvSe1bxVJ/5esZ/WnwvrvI7sDaURwUqhYRDwO3EF2UPmfmsVPkHW15wEXAkdHxANp2ZfJLvrdkoYBrqfBOHxEPE12Fv8ycKukJWRnZM+S3QWykCzhfJFs2OIY4CPpTLM//wBcQXYQuB+4ideui3yPbPz+GUnfb7D+b8mGvv5KNhTxAjXDSwPQe+vlQkmNxrNJB9s/kx3Mr25Uj+yurRlkZ7Z3k71OX6uzvfuAM8ju6lkA7MjrDxq/JzsbfULSG57TiLge+ApwJdkZ7pY0vj40GN8GpqQTiC8B/0w2NHIOcGlN3alkJw+LJB1YE+dLZEngQ2S91R8Bhxfek4cBs9P78Wiysfg3SNs5j+xEqK6IeDAijiPriZzYdEtfcxLZzRGPkb0nvx0Rv4H8QL9U0qZpX78BvkV2veDvaZ2T0nbWJOtdPUPWI9oL+FBvLyJdm9qbvk8uhhVFNNvjtrJIOheYFxEnFsomkd1FsnFVcZmVRVIX2UnQLlHSF9jaQdK/AZtExDH9Vh4m2v3lIauR7jD5GLBLP1XNRox0N9e2VcfRqoj4QdUxDDUPH1VI0qlkFyS/HRGPVh2PmZmHj8zMLOeegpmZ5Yb1NYVx48ZFd3d31WGYmQ0rM2fOfCoi6n7ZdVgnhe7ubmbMmFF1GGZmw4qk2l8wyHn4yMzMck4KZmaWc1IwM7Ock4KZmeWcFMzMLOekYGZmOScFMzPLOSmYmVnOScHMzHLD+hvNreo+9pp8evbpH64wEjOzzuCegpmZ5ZwUzMws56RgZmY5JwUzM8s5KZiZWc5JwczMck4KZmaWc1IwM7Ock4KZmeWcFMzMLFdaUpB0rqQnJd1TKLtU0qz0mC1pVirvlrSssOzssuIyM7PGyvzto/OAHwLn9xZExEG905LOAJ4t1H8kIsaXGI+ZmfWjtKQQETdL6q63TJKAA4Hdy9q/mZkNXFXXFCYCCyLioULZ5pLulHSTpImNVpQ0RdIMSTN6enrKj9TMbBSpKikcAlxcmJ8PbBoRuwBfAC6StFa9FSNiWkRMiIgJXV1dbQjVzGz0aHtSkDQG+BhwaW9ZRLwYEQvT9EzgEWDrdsdmZjbaVdFT+CfggYiY01sgqUvSCml6C2Ar4G8VxGZmNqqVeUvqxcD/AttImiPp02nRwbx+6AjgvcBd6RbVK4CjI+LpsmIzM7P6yrz76JAG5Z+sU3YlcGVZsZiZWXP8jWYzM8s5KZiZWc5JwczMck4KZmaWc1IwM7Ock4KZmeWcFMzMLOekYGZmOScFMzPLOSmYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknBTMzyzkpmJlZrsz/0XyupCcl3VMomypprqRZ6bF3Ydlxkh6W9KCkD5YVl5mZNVZmT+E8YK865WdFxPj0uBZA0vbAwcAOaZ0fSVqhxNjMzKyO0pJCRNwMPN1k9X2BSyLixYh4FHgY2K2s2MzMrL4qril8TtJdaXhpnVS2EfB4oc6cVGZmZm3U7qTwY2BLYDwwHzhjoBuQNEXSDEkzenp6hjg8M7PRra1JISIWRMQrEfEqcA6vDRHNBTYpVN04ldXbxrSImBARE7q6usoN2MxslGlrUpC0YWF2f6D3zqSrgYMlrSxpc2Ar4LZ2xmZmZjCmrA1LuhiYBIyTNAc4CZgkaTwQwGzgKICIuFfSZcB9wHLgsxHxSlmxmZlZfaUlhYg4pE7xz/qo/3Xg62XFY2Zm/fM3ms3MLOekYGZmOScFMzPLOSmYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknBTMzyzkpmJlZzknBzMxyTgpmZpZzUjAzs5yTgpmZ5ZwUzMws56RgZmY5JwUzM8uVlhQknSvpSUn3FMq+LekBSXdJukrS2FTeLWmZpFnpcXZZcZmZWWNl9hTOA/aqKbsOeGtE7AT8FTiusOyRiBifHkeXGJeZmTVQWlKIiJuBp2vKfhcRy9PsLcDGZe3fzMwGrsprCv8C/Lowv7mkOyXdJGlio5UkTZE0Q9KMnp6e8qM0MxtFKkkKkk4AlgMXpqL5wKYRsQvwBeAiSWvVWzcipkXEhIiY0NXV1Z6AzcxGibYnBUmfBD4CTI6IAIiIFyNiYZqeCTwCbN3u2MzMRru2JgVJewHHAB+NiOcL5V2SVkjTWwBbAX9rZ2xmZgZjytqwpIuBScA4SXOAk8juNloZuE4SwC3pTqP3AqdIehl4FTg6Ip6uu2EzMytNaUkhIg6pU/yzBnWvBK4sKxYzM2tOU8NHknYsOxAzM6tes9cUfiTpNkmfkbR2qRGZmVllmkoKETERmAxsAsyUdJGkPUuNzMzM2q7pu48i4iHgRODLwPuA76ffMfpYWcGZmVl7NXtNYSdJZwH3A7sD+0TEdmn6rBLjMzOzNmr27qMfAD8Fjo+IZb2FETFP0omlRGZmZm3XbFL4MLAsIl4BkPQmYJWIeD4ifl5adGZm1lbNXlO4Hli1ML9aKjMzsxGk2aSwSkQs7Z1J06uVE5KZmVWl2aTwnKRde2ckvQ1Y1kd9MzMbhpq9pvB54HJJ8wAB/wAcVFZQZmZWjaaSQkTcLmlbYJtU9GBEvFxeWGZmVoWB/CDe24HutM6ukoiI80uJyszMKtFUUpD0c2BLYBbwSioOwEnBzGwEabanMAHYvvc/pZmZ2cjU7N1H95BdXDYzsxGs2Z7COOA+SbcBL/YWRsRHS4nKzMwq0WxSmFpmEGZm1hma/X8KNwGzgRXT9O3AHf2tJ+lcSU9KuqdQtq6k6yQ9lP6uk8ol6fuSHpZ0V/HLcmZm1h7N/nT2kcAVwE9S0UbAL5pY9Txgr5qyY4EbImIr4IY0D/AhYKv0mAL8uJnYzMxs6DR7ofmzwHuAxZD/w531+1spIm4Gnq4p3heYnqanA/sVys+PzC3AWEkbNhmfmZkNgWaTwosR8VLvjKQxZN9TGIwNImJ+mn4C2CBNbwQ8Xqg3J5WZmVmbNJsUbpJ0PLBq+t/MlwO/bHXn6XsPA0oukqZImiFpRk9PT6shmJlZQbNJ4VigB7gbOAq4luz/NQ/Ggt5hofT3yVQ+F9ikUG/jVPY6ETEtIiZExISurq5BhmBmZvU0e/fRqxFxTkR8IiIOSNODHT66GjgiTR8B/Heh/PB0F9I7gWcLw0xmZtYGzf720aPUGeaJiC36We9iYBIwTtIc4CTgdOAySZ8GHgMOTNWvBfYGHgaeBz7VXBPMzGyoDOS3j3qtAnwCWLe/lSLikAaL9qhTN8jucjIzs4o0O3y0sPCYGxHfBT5cbmhmZtZuzQ4fFb9d/CaynsNA/heDmZkNA80e2M8oTC8n+8mLA+tXNTOz4arZf8f5/rIDMTOz6jU7fPSFvpZHxJlDE46ZmVVpIHcfvZ3suwQA+wC3AQ+VEZSZmVWj2aSwMbBrRCwBkDQVuCYiDi0rMDMza79mf+ZiA+ClwvxLvPZDdmZmNkI021M4H7hN0lVpfj9e+/lrMzMbIZq9++jrkn4NTExFn4qIO8sLy8zMqtDs8BHAasDiiPgeMEfS5iXFZGZmFWn233GeBHwZOC4VrQhcUFZQZmZWjWZ7CvsDHwWeA4iIecCaZQVlZmbVaDYpvFT8L2mSVi8vJDMzq0qzSeEyST8Bxko6ErgeOKe8sMzMrAr93n0kScClwLbAYmAb4KsRcV3JsZmZWZv1mxQiIiRdGxE7Ak4EZmYjWLPDR3dIenupkZiZWeWa/UbzO4BDJc0muwNJZJ2IncoKzMzM2q/PpCBp04j4O/DBodqhpG3IrlH02gL4KjAWOBLoSeXHR8S1Q7VfMzPrX389hV+Q/TrqY5KujIiPt7rDiHgQGA8gaQVgLnAV8CngrIj4Tqv7MDOzwenvmoIK01uUsP89gEci4rEStm1mZgPUX1KIBtND5WDg4sL85yTdJelcSevUW0HSFEkzJM3o6empV8XMzAapv6Sws6TFkpYAO6XpxZKWSFrcyo4lrUT20xmXp6IfA1uSDS3NB86ot15ETIuICRExoaurq5UQzMysRp/XFCJihRL3/SHgjohYkPa1oHeBpHOAX5W4bzMzq2MgP5091A6hMHQkacPCsv2Be9oekZnZKNfs9xSGVPpBvT2BowrF35I0nuzaxeyaZWZm1gaVJIWIeA5Yr6bssCpiMTOz11Q5fGRmZh3GScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknBTMzyzkpmJlZzknBzMxyTgpmZpZzUjAzs5yTgpmZ5ZwUzMws56RgZmY5JwUzM8s5KZiZWc5JwczMck4KZmaWq+R/NANImg0sAV4BlkfEBEnrApcC3cBs4MCIeKaqGM3MRpuqewrvj4jxETEhzR8L3BARWwE3pHkzM2uTqpNCrX2B6Wl6OrBfdaGYmY0+VSaFAH4naaakKalsg4iYn6afADaoXUnSFEkzJM3o6elpV6xmZqNCZdcUgH+MiLmS1geuk/RAcWFEhKSoXSkipgHTACZMmPCG5WZmNniV9RQiYm76+yRwFbAbsEDShgDp75NVxWdmNhpVkhQkrS5pzd5p4APAPcDVwBGp2hHAf1cRn5nZaFXV8NEGwFWSemO4KCJ+I+l24DJJnwYeAw6sKD4zs1GpkqQQEX8Ddq5TvhDYo/0RmZkZdN4tqWZmViEnBTMzyzkpmJlZzknBzMxyTgpmZpZzUjAzs5yTgpmZ5ZwUzMws56RgZmY5JwUzM8s5KZiZWc5JwczMck4KZmaWc1IwM7Ock4KZmeWcFMzMLOekYGZmOScFMzPLtT0pSNpE0h8k3SfpXkn/nsqnSporaVZ67N3u2MzMRrsq/kfzcuCLEXGHpDWBmZKuS8vOiojvVBCTmZlRQVKIiPnA/DS9RNL9wEbtjsPMzN6o0msKkrqBXYBbU9HnJN0l6VxJ6zRYZ4qkGZJm9PT0tCtUM7NRobKkIGkN4Erg8xGxGPgxsCUwnqwncUa99SJiWkRMiIgJXV1d7QrXzGxUqCQpSFqRLCFcGBH/BRARCyLilYh4FTgH2K2K2MzMRrMq7j4S8DPg/og4s1C+YaHa/sA97Y7NzGy0q+Luo/cAhwF3S5qVyo4HDpE0HghgNnBUBbFZH7qPvSafnn36hyuMxMzKUsXdR38EVGfRte2OxczMXq+KnkJH8lmwmZmTwojhpGZmQ8G/fWRmZjn3FPrRaWfgnRaPmY0sTgp1FA+8jcqLB+QyDtSdcvBv9FyY2cjkpDBIPlia2UjkpFCBqnoBVfY+BrrvdvbKzOw1TgolauVA2G5DddA2s+HNdx+ZmVnOPYU2aebi9UDX7UTN9CBG8oV5s+HOSWGIDZcD+HCJsyxOImb1OSmMcIM5+A90nbITTCvxdMoB3xfObbhwUhjGWhmSGi3KOuj6YG4jlZOClWag10vKPri2I1kOVXta2Y4TlrXCScGGrTIO8n1ts6oD7Eg4yI+ENowWTgrWMaoa9mp2vyN1uM4H7Pbq9OfbScFsGOmUA0onDG81SsZlDNt1yvPeDk4KZiUpu2dRxjWbwRz8ykgQrRzwy/6uTLNDjFV9Z6dVTgpmHaKdw1CddhNAWVr5cuhQvh7DaYix437mQtJekh6U9LCkY6uOx8xsNOmonoKkFYD/BPYE5gC3S7o6Iu6rNjKz0a3Vi/Flr9sp2jk0WFbvraOSArAb8HBE/A1A0iXAvoCTglmLfNDtbJ3Stk5LChsBjxfm5wDvKFaQNAWYkmaXSnqwhf2NA55qYf3hZrS1F9zm0WLUtVnfbKnNmzVa0GlJoV8RMQ2YNhTbkjQjIiYMxbaGg9HWXnCbRwu3eeh02oXmucAmhfmNU5mZmbVBpyWF24GtJG0uaSXgYODqimMyMxs1Omr4KCKWS/oc8FtgBeDciLi3xF0OyTDUMDLa2gtu82jhNg8RRUQZ2zUzs2Go04aPzMysQk4KZmaWG5FJob+fypC0sqRL0/JbJXUXlh2Xyh+U9MG2Bt6CwbZZ0nqS/iBpqaQftj3wFrTQ5j0lzZR0d/q7e9uDH6QW2rybpFnp8RdJ+7c9+EFq5fOclm+a3t9falvQLWjhNe6WtKzwOp89qAAiYkQ9yC5QPwJsAawE/AXYvqbOZ4Cz0/TBwKVpevtUf2Vg87SdFapuU8ltXh34R+Bo4IdVt6VNbd4FeHOafiswt+r2tKHNqwFj0vSGwJO98538aKXNheVXAJcDX6q6PSW/xt3APa3GMBJ7CvlPZUTES0DvT2UU7QtMT9NXAHtIUiq/JCJejIhHgYfT9jrdoNscEc9FxB+BF9oX7pBopc13RsS8VH4vsKqkldsSdWtaafPzEbE8la8CDJc7TFr5PCNpP+BRstd5OGipvUNhJCaFej+VsVGjOumD8iywXpPrdqJW2jxcDVWbPw7cEREvlhTnUGqpzZLeIele4G7g6EKS6GSDbrOkNYAvAye3Ic6h0ur7enNJd0q6SdLEwQTQUd9TMGsnSTsA3wQ+UHUs7RARtwI7SNoOmC7p1xEx3HqIAzEVOCsilg7hiXQnmw9sGhELJb0N+IWkHSJi8UA2MhJ7Cs38VEZeR9IYYG1gYZPrdqJW2jxctdRmSRsDVwGHR8QjpUc7NIbkdY6I+4GlZNdTOl0rbX4H8C1Js4HPA8enL8d2skG3Nw17LwSIiJlk1ya2HmgAIzEpNPNTGVcDR6TpA4DfR3al5mrg4HR1f3NgK+C2NsXdilbaPFwNus2SxgLXAMdGxJ/aFfAQaKXNm6cDCJI2A7YFZrcn7JYMus0RMTEiuiOiG/gucFpEdPoddq28xl3K/icNkrYgO379bcARVH21vYwHsDfwV7JMeUIqOwX4aJpehexuhIfJDvpbFNY9Ia33IPChqtvSpjbPBp4mO3ucQ83dDp36GGybgROB54BZhcf6Vben5DYfRnaxdRZwB7Bf1W0pu80125jKMLj7qMXX+OM1r/E+g9m/f+bCzMxyI3H4yMzMBslJwczMck4KZmaWc1IwM7Ock4KZmeWcFMzMLOekYGZmuf8PFnLa6cWQNqYAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF[typeConstDF['violation_ratio'] <= 0.05].violation_ratio.plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios (<=0.05)\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "minor-marshall", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/1465\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(typeConstDF['violation_ratio'] >= 5.286054)}/{len(typeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "special-consensus", "metadata": {}, "outputs": [], "source": [ "# typeConstDF.sort_values(by=['incorrect'],ascending=False).head(5).paths.values" ] }, { "cell_type": "code", "execution_count": 16, "id": "excited-person", "metadata": {}, "outputs": [], "source": [ "# !cat ../../allConstraintsAnalysisWRemoved2/typeConstraint/normal/claims.type-constraints.instanceOf.P953.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 17, "id": "revolutionary-violence", "metadata": {}, "outputs": [], "source": [ "for key1 in typeConstViolations.keys():\n", " typeConstViolations[key1]['correct'] = typeConstViolations[key1]['instanceOf']['correct'] + typeConstViolations[key1]['subclass']['correct'] + typeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " typeConstViolations[key1]['incorrect'] = typeConstViolations[key1]['instanceOf']['incorrect'] + typeConstViolations[key1]['subclass']['incorrect'] + typeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " typeConstViolations[key1]['VR'] = typeConstViolations[key1]['incorrect'] / (typeConstViolations[key1]['correct'] + typeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 18, "id": "emotional-favorite", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 46304082, 'incorrect': 795451},\n", " 'subclass': {'correct': 2064, 'incorrect': 53},\n", " 'instanceOfOrSubclass': {'correct': 233195, 'incorrect': 3169},\n", " 'propCount': 167,\n", " 'correct': 46539341,\n", " 'incorrect': 798673,\n", " 'VR': 0.01687170484169446},\n", " 'suggestion': {'instanceOf': {'correct': 61936, 'incorrect': 18751},\n", " 'subclass': {'correct': 0, 'incorrect': 0},\n", " 'instanceOfOrSubclass': {'correct': 24237, 'incorrect': 3458},\n", " 'propCount': 11,\n", " 'correct': 86173,\n", " 'incorrect': 22209,\n", " 'VR': 0.20491410012732741},\n", " 'normal': {'instanceOf': {'correct': 425646789, 'incorrect': 5275469},\n", " 'subclass': {'correct': 98826, 'incorrect': 13611},\n", " 'instanceOfOrSubclass': {'correct': 68370289, 'incorrect': 852276},\n", " 'propCount': 1287,\n", " 'correct': 494115904,\n", " 'incorrect': 6141356,\n", " 'VR': 0.012276395548962147}}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstViolations" ] }, { "cell_type": "code", "execution_count": 19, "id": "aggregate-impact", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratiototal
P2093148843205927027[../../allConstraintsAnalysisWRemoved2/typeCon...0.006190149770232
P147644059166208472[../../allConstraintsAnalysisWRemoved2/typeCon...0.00470944267638
P57739990807165864[../../allConstraintsAnalysisWRemoved2/typeCon...0.00413040156671
P143337028672112955[../../allConstraintsAnalysisWRemoved2/typeCon...0.00304137141627
P121533425605316565[../../allConstraintsAnalysisWRemoved2/typeCon...0.00938233742170
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2093 148843205 927027 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P1476 44059166 208472 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P577 39990807 165864 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P1433 37028672 112955 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "P1215 33425605 316565 [../../allConstraintsAnalysisWRemoved2/typeCon... \n", "\n", " violation_ratio total \n", "P2093 0.006190 149770232 \n", "P1476 0.004709 44267638 \n", "P577 0.004130 40156671 \n", "P1433 0.003041 37141627 \n", "P1215 0.009382 33742170 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['total'] = typeConstDF['correct'] + typeConstDF['incorrect']\n", "typeConstDF.sort_values(by=['total'],ascending=False).head()" ] }, { "cell_type": "markdown", "id": "bearing-kruger", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "assumed-toner", "metadata": {}, "outputs": [], "source": [ "# from tqdm.notebook import tqdm\n", "# import os.path\n", "\n", "# cnt = 0\n", "# fCnt = 1\n", "# for prop in tqdm(df1.node1.unique()):\n", "# try:\n", "# if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv\")):\n", "# continue\n", "# relation = df1[(df1['node1'] == prop) & (df1['label'] == 'P2309')].node2.values[0][0]\n", "# type1 = df1[(df1['node1'] == prop) & (df1['label'] == 'P2316')].node2.values\n", "\n", "# parents = df1[(df1['node1'] == prop) & (df1['label'] == 'P2308')].node2.values[0]\n", "# exceptions = df1[(df1['node1'] == prop) & (df1['label'] == 'P2303')].node2.values\n", "\n", "# # print(prop, relation, type1, parents, exceptions)\n", "\n", "# if relation == \"Q21503252\":\n", "# parentFile = \"P31P279star\"\n", "# parentTitle = 'instanceOf'\n", "# elif relation == \"Q21514624\":\n", "# parentFile = \"P279star\"\n", "# parentTitle = 'subclass'\n", "# else:\n", "# parentFile = \"isastar\"\n", "# parentTitle = 'instanceOfOrSubclass'\n", "\n", "# if len(type1) != 0 and type1[0][0] == \"Q21502408\":\n", "# typeVal = \"mandatory\"\n", "# elif len(type1) != 0 and type1[0][0] == \"Q62026391\":\n", "# typeVal = \"suggestion\"\n", "# else:\n", "# typeVal = \"normal\"\n", "\n", "# if len(exceptions):\n", "# exceptionPart = \"or node1 in \" + str(exceptions[0]).replace(\"'\",'\"')\n", "# else:\n", "# exceptionPart = \"\"\n", " \n", "# if cnt % 100 == 0:\n", "# fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/TimedTypeConstraintValidator\" + str(fCnt) + \".sh\",\"w\")\n", "# fCnt += 1\n", " \n", "# fOP.write(\"{ time kgtk --debug query -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", "# ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", "# --match 'm: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)' \\\n", "# --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", "# --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", "# -o ../../allConstraintsAnalysisWRemoved2/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", "# --graph-cache ~/sqlite3_caches/const2123_\" + str(fCnt) + \".sqlite3.db; } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/TimedTypeConstraint_TimedTypeConstraintValidator\" + str(fCnt) + \".txt ; \\\n", "# kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", "# --filter-on ../../allConstraintsAnalysisWRemoved2/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", "# --filter-mode NONE \\\n", "# --input-keys node1 label \\\n", "# --filter-keys node1 label \\\n", "# -o ../../allConstraintsAnalysisWRemoved2/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv\\n\")\n", "\n", "# cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", " " ] }, { "cell_type": "code", "execution_count": 93, "id": "veterinary-fault", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "52944ea021934d23b3d4ab3fb1f091f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/122 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for type constraint checks\")" ] }, { "cell_type": "markdown", "id": "intense-computer", "metadata": {}, "source": [ "## Value Type Constraint" ] }, { "cell_type": "markdown", "id": "animated-companion", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 9, "id": "static-profit", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "dfValueType = pd.read_csv('../../constraintsOP/valuetypeConstraint/claims.type-constraints_all1.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 10, "id": "worthy-malawi", "metadata": {}, "outputs": [], "source": [ "dfValueType = dfValueType.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 11, "id": "eleven-tiffany", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1000P2308[Q1241356]
1P1000P2309[Q30208840]
2P1001P2308[Q20926517, Q2881272, Q2882257, Q3624078, Q389...
3P1001P2309[Q30208840]
4P1002P2308[Q2576663]
\n", "
" ], "text/plain": [ " node1 label node2\n", "0 P1000 P2308 [Q1241356]\n", "1 P1000 P2309 [Q30208840]\n", "2 P1001 P2308 [Q20926517, Q2881272, Q2882257, Q3624078, Q389...\n", "3 P1001 P2309 [Q30208840]\n", "4 P1002 P2308 [Q2576663]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType.head()" ] }, { "cell_type": "code", "execution_count": 12, "id": "expired-stuff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2308', 'P2309', 'P2303', 'P2316', 'P6607', 'P2304'], dtype=object)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType['label'].unique()" ] }, { "cell_type": "code", "execution_count": 13, "id": "imposed-newsletter", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [node1, label, node2]\n", "Index: []" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType['label'] == '2316']" ] }, { "cell_type": "code", "execution_count": 14, "id": "answering-alabama", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
330P1659P2308[Q18616576]
331P1659P2309[Q21503252]
332P1659P2316[Q21502408]
\n", "
" ], "text/plain": [ " node1 label node2\n", "330 P1659 P2308 [Q18616576]\n", "331 P1659 P2309 [Q21503252]\n", "332 P1659 P2316 [Q21502408]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType['node1'] == 'P1659']" ] }, { "cell_type": "code", "execution_count": 15, "id": "danish-blackberry", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
2031P991P2308[Q5, Q7210356]
2032P991P2309[Q21503252]
\n", "
" ], "text/plain": [ " node1 label node2\n", "2031 P991 P2308 [Q5, Q7210356]\n", "2032 P991 P2309 [Q21503252]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType.node1 == 'P991']" ] }, { "cell_type": "markdown", "id": "digital-harvard", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 16, "id": "white-badge", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "080f8e771b7448de82088862b4330e8b", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/932 [00:00(node2), \" + parentFile + \": (node2)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/const112_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\")\n", " \n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 17, "id": "qualified-cursor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "904" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 18, "id": "simplified-cameroon", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,9):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/valueTypeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "spectacular-warner", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 20, "id": "valid-defense", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "38d78b0ecfdc40f596565c00c4b4fbd8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cf62ec681d004b5c84cbcfa4e5968788", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/216 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P85245264[../../allConstraintsAnalysisWRemoved2/valuety...0.000883
P85316004[../../allConstraintsAnalysisWRemoved2/valuety...0.002494
P23024791826[../../allConstraintsAnalysisWRemoved2/valuety...0.000542
P309275347[../../allConstraintsAnalysisWRemoved2/valuety...0.000928
P30961119310[../../allConstraintsAnalysisWRemoved2/valuety...0.000893
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P852 4526 4 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P853 1600 4 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P2302 47918 26 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P3092 7534 7 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P3096 11193 10 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "\n", " violation_ratio \n", "P852 0.000883 \n", "P853 0.002494 \n", "P2302 0.000542 \n", "P3092 0.000928 \n", "P3096 0.000893 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 26, "id": "neural-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P50080341961[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P610409808[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P254501378[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P26680179[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P7374044[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P3028015[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P2839015[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P3027013[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P2127012[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P538010[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P224106[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P442506[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P619105[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P653305[../../allConstraintsAnalysisWRemoved2/valuety...1.0
P653405[../../allConstraintsAnalysisWRemoved2/valuety...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5008 0 341961 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P6104 0 9808 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P2545 0 1378 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P2668 0 179 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P7374 0 44 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P3028 0 15 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P2839 0 15 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P3027 0 13 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P2127 0 12 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P538 0 10 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P2241 0 6 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P4425 0 6 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P6191 0 5 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P6533 0 5 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "P6534 0 5 [../../allConstraintsAnalysisWRemoved2/valuety... \n", "\n", " violation_ratio \n", "P5008 1.0 \n", "P6104 1.0 \n", "P2545 1.0 \n", "P2668 1.0 \n", "P7374 1.0 \n", "P3028 1.0 \n", "P2839 1.0 \n", "P3027 1.0 \n", "P2127 1.0 \n", "P538 1.0 \n", "P2241 1.0 \n", "P4425 1.0 \n", "P6191 1.0 \n", "P6533 1.0 \n", "P6534 1.0 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 27, "id": "cutting-polyester", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 904.000000\n", "mean 0.098485\n", "std 0.214803\n", "min 0.000000\n", "25% 0.001492\n", "50% 0.011225\n", "75% 0.063950\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 28, "id": "alert-receiver", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios')" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEICAYAAACwDehOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAaZElEQVR4nO3de5gdVZnv8e8PEgjIJUIiA0mgQUBAUYkB4vGZkQFULkKYERSO4TYR1ME5OuoAokdxxmHwGRVkxhuCQ7jIRRgxAh7lJhwdA4YBuXMIGMgFSLgkEK4G3vPHWr0omt3d1emuvbs7v8/z7KerVtWuelft6nr3Wqv23ooIzMzMANbqdABmZjZ8OCmYmVnhpGBmZoWTgpmZFU4KZmZWOCmYmVnhpDDKSApJ23Y6DkskfVTSr9q4v19IOrLGel35XBmzmvs5SdJZq/PcdpK0paSVktbudCwjhZPCMCPp/0j6xxblMyQ9urr/xIOM6a78j7VS0suSXqjMn9SmGDaSdLqkh/N+H8jzExrc5zmSvjaYbUTEBRHx/pr7O0rSb/pY/n1J57Yof4ekFyVtEhH7RsTswcTcYvt7SFpULYuIUyLiY0O5n7yvo/I5tlLS05L+IOmDA3j+Akl7V+J8OCI2iIiXhzrW0cpJYfiZDcyUpB7lhwMXRMSqdgcUEW/N/1gbAP8X+FT3fESc0vT+Ja0DXAu8FdgH2Ah4N/AEsFvT++8jrnYn6NnAX0t6Q4/yw4ErIuLJNsfTlN/lc2088F3gIknjOxrRmiQi/BhGD2A9YAXwF5WyNwIvAO8gXQR/BywHHgH+HVinsm4A2+bpXwMfqyw7CvhNZX4H4GrgSeA+4MM14ivbBO4EDqgsGws8DuwCdOVYjgWW5Fg/X1l3LeBE4AHSxf0SYJNe9vkx4DFggz7i2jHHthy4Cziwsuwc4DvAlcAzwE3Am/MyAacBS4GngTuAt+W4/wS8BKwEfp7XXwCcANwOvAiMqdTjGeBu4K/6OOYBfAK4P8f6nRzDjvk1fjnvb3kv9bwPOKIyv3Y+vjNavD5rAV8CHsr1OxfYOC/rfn3G5PmjgXtyHR4EPp7L3wA8D7yS41oJbAGcDJxfiePAfNyX5xh2rCxbAHw+H7MVwMXAuF7q1/N4rZ/j3DXPvxm4jnTOPA5cAIzPy87LcT6f4zy+RT23AOaQzvn5wDGVfe0GzMvnwWPAtzp9PejEo+MB+NHiRYEfAmdV5j8O3Jan3wVMzxejrvyP/JnKurWSQv5nX5gvBmNIF/LHgZ36ia160TkeuLiybAZwR57u/me8MO9rZ2AZsHde/mlgLjAZWBf4AXBhL/u8CJjdR0xj8z/4ScA6wJ6ki9tb8vJzeLVVMSZfSC7Kyz4A3EJ6V9p9cd688ryv9djXAuA2YAqwXi47JF9s1gI+Ajxb2UY55pXX54q8vy3zMdmn1bq91PWLwDWV+Q/kbYxt8fr8TT4u2wAbAP8JnNfj9em+WO5PuuAKeC/wHDA1L9sDWNQjjpPJSQHYPtf5ffm1OD7vd53KMbs5H6NNSOfsJ3qpXzkGpIR3HCkxvymXbZv3sy4wEbgROL3H67N3Zb5nPW8ktT7GAe/Mx27PvOx3wOF5egNgeqevBZ14uPtoeJoNHCxpXJ4/IpcREbdExNyIWBURC0gX0/euxj4+CCyIiP/I27oVuIx0gavrfGA/SRvl+cNJ79aqvhoRz0bEHcB/AIfl8k8AX4yIRRHxIukic3AvXTKbkloavZlO+ic+NSJeiojrSBfewyrr/DQibo7U/XYB6YIAqTWwIanVpIi4JyL62hfAGRGxMCKeB4iIn0TEkoh4JSIuJrUC+urWOjUilkfEw8D1lVjqOA94r6TJef4I4McR8acW636U9G73wYhYCXwBOLTVMY6IKyPigUhuAH4F/HnNmD4CXBkRV+c4vkFq8f6Pyjpn5GP0JPBz+q7zdEnLSS2nbwAzI2JpjnN+3s+LEbEM+BY1z39JU4D3ACdExAsRcRtwFukYQjoXtpU0ISJWRsTcOtsdbZwUhqGI+A3pXftBkt5MusD8GEDS9pKuyIPOTwOnAKsz2LoVsLuk5d0P0kXkzwYQ5xLgt8CHcp/vvqQLbtXCyvRDpHeL3fv/aWXf95C6TjZrsasngM37CGULYGFEvNJjX5Mq849Wpp8jJRFyAvl3UjfOUklnVpJcb6p1QtIRkm6r1OVt9P2atIyljpxIbiSNO20AHETqFmplC9Jx6PYQqaX0umMsaV9JcyU9meuwXz916HU/+XVYSI3j34u5ETGe1G06h0pykrSZpIskLc7n//kDjPPJiHimUlY9T2aRWj33Svr9QAa4RxMnheHrXNI7mJnALyPisVz+PeBeYLuI2IjUZdJzULrbs6Q+2W7VC/5C4IaIGF95bBARnxxgnLNzjIeQBggX91g+pTK9Jan/u3v/+/bY/7gWzwe4BvhAiwHWbkuAKZKq5/OWQKttvU5EnBER7wJ2Il0U/qF7UW9P6Z6QtBWpu+9TwKb5YnYnvb8mfYZSc73ZpFbZh4A/RsQtvay3hJR8u20JrCL1lxeS1iW1Er8BbJbrcBWv1qG/uF6zn3yTxBRqHv/e5NbNJ4HDJe2Si0/J8eycz/+ZvPZY9xXrEmATSRtWysp5EhH3R8RhwJuArwOX9nHOjVpOCsPXucDewDHkrqNsQ9JA2EpJO5D+aXpzG+lulfXzZxdmVZZdAWwv6XBJY/NjV0k7DjDOy4GppDGCVu9Y/3fe/1tJ4xcX5/LvA/+cL6pImihpRi/7OI+URC6TtIOktSRtmu+V3480cPwccHyuxx7AAaSxiD7lOu8uaSwpib5AGqyEdPHcpp9NvIF0IVqWt3c0qaWwOh4DJue7rfpyGeli9lVee270dCHw95K2zq2KU0hjQD3vYFuH1Ee/DFglaV+gehvtY8CmkjbuZT+XAPtL2isfx8+RBuH/q5969Ct3N50FfDkXbUgaRF4haRKvJvBqrC1fs4hYmGP6F0njJL2d9D9xPoCkmZIm5pbO8vy0V1ptazRzUhim8njBf5EuOnMqiz4P/E/SQOoPefUi28pppEG6x0gXj9K1k5vQ7wcOJb2DepT07mjdAcb5POkitTVpILOnG0iDjtcC34iI7g9yfTvX61eSniENOu/eyz5eJCXIe0l3Sz1NGricANwUES+RksC+pG6375Lu0Lm3RhU2Ih3Hp0hdCU8A/5qXnQ3slLuFLu8ltruBb5IGKR8jDaj/tsZ+W7mOdAfPo5Ie722liHiWdMwn8/ruuqofkRLqjcAfSQnv71ps7xngf5Eu7k+Rzq85leX3khLMg/lYbNHj+feR3rH/G+n4H0C6K+2lfupb1+mksau3kxLhVNJdTFfy+nPuX4Av5Tg/32Jbh5EGn5cAPwW+EhHX5GX7AHdJWkk6Pw/tHjdakyjCP7JjgyPpy8D2ETGzUtZFuhCNbfHO1MyGqbZ/OtZGF0mbkJrgh3c6FjMbPHcf2WqTdAypr/8XEXFjp+Mxs8Fz95GZmRVuKZiZWTGixxQmTJgQXV1dnQ7DzGxEueWWWx6PiImtlo3opNDV1cW8efM6HYaZ2Ygi6aHelrn7yMzMCicFMzMrnBTMzKxwUjAzs8JJwczMCicFMzMrnBTMzKxwUjAzs8JJwczMihH9iebB6DrxyjK94NT9OxiJmdnw4ZaCmZkVTgpmZlY4KZiZWeGkYGZmhZOCmZkVTgpmZlY4KZiZWeGkYGZmhZOCmZkVTgpmZlY4KZiZWeGkYGZmhZOCmZkVjScFSWtLulXSFXl+a0k3SZov6WJJ6+TydfP8/Ly8q+nYzMzstdrRUvg0cE9l/uvAaRGxLfAUMCuXzwKeyuWn5fXMzKyNGk0KkiYD+wNn5XkBewKX5lVmAwfl6Rl5nrx8r7y+mZm1SdMthdOB44FX8vymwPKIWJXnFwGT8vQkYCFAXr4ir/8ako6VNE/SvGXLljUYupnZmqexpCDpg8DSiLhlKLcbEWdGxLSImDZx4sSh3LSZ2RqvyZ/jfA9woKT9gHHARsC3gfGSxuTWwGRgcV5/MTAFWCRpDLAx8ESD8ZmZWQ+NtRQi4gsRMTkiuoBDgesi4qPA9cDBebUjgZ/l6Tl5nrz8uoiIpuIzM7PX68TnFE4APitpPmnM4OxcfjawaS7/LHBiB2IzM1ujNdl9VETEr4Ff5+kHgd1arPMCcEg74jEzs9b8iWYzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrHBSMDOzwknBzMwKJwUzMyucFMzMrGgsKUgaJ+lmSX+QdJekr+byrSXdJGm+pIslrZPL183z8/PyrqZiMzOz1ppsKbwI7BkR7wDeCewjaTrwdeC0iNgWeAqYldefBTyVy0/L65mZWRs1lhQiWZlnx+ZHAHsCl+by2cBBeXpGnicv30uSmorPzMxer9ExBUlrS7oNWApcDTwALI+IVXmVRcCkPD0JWAiQl68ANm2xzWMlzZM0b9myZU2Gb2a2xmk0KUTEyxHxTmAysBuwwxBs88yImBYR0yZOnDjYzZmZWUVb7j6KiOXA9cC7gfGSxuRFk4HFeXoxMAUgL98YeKId8ZmZWdLk3UcTJY3P0+sB7wPuISWHg/NqRwI/y9Nz8jx5+XUREU3FZ2Zmrzem/1VW2+bAbElrk5LPJRFxhaS7gYskfQ24FTg7r382cJ6k+cCTwKENxmZmZi00lhQi4nZglxblD5LGF3qWvwAc0lQ8ZmbWP3+i2czMCicFMzMrnBTMzKxwUjAzs8JJwczMilpJQdLOTQdiZmadV7el8N38Ndh/K2njRiMyM7OOqZUUIuLPgY+SvobiFkk/lvS+RiMzM7O2qz2mEBH3A18CTgDeC5wh6V5Jf91UcGZm1l51xxTeLuk00ncX7QkcEBE75unTGozPzMzaqO7XXPwbcBZwUkQ8310YEUskfamRyMzMrO3qJoX9gecj4mUASWsB4yLiuYg4r7HozMysreqOKVwDrFeZXz+XmZnZKFI3KYyr/N4yeXr9ZkIyM7NOqZsUnpU0tXtG0ruA5/tY38zMRqC6YwqfAX4iaQkg4M+AjzQVlJmZdUatpBARv5e0A/CWXHRfRPypubDMzKwTBvLLa7sCXfk5UyUREec2EpWZmXVEraQg6TzgzcBtwMu5OAAnBTOzUaRuS2EasFNERJPBmJlZZ9W9++hO0uCymZmNYnVbChOAuyXdDLzYXRgRBzYSlZmZdUTdpHByk0GYmdnwUPeW1BskbQVsFxHXSFofWLvZ0MzMrN3qfnX2McClwA9y0STg8oZiMjOzDqk70Hwc8B7gaSg/uPOmpoIyM7POqJsUXoyIl7pnJI0hfU7BzMxGkbpJ4QZJJwHr5d9m/gnw8+bCMjOzTqibFE4ElgF3AB8HriL9XrOZmY0ide8+egX4YX6YmdkoVfe7j/5IizGEiNhmyCMyM7OOGch3H3UbBxwCbDL04ZiZWSfVGlOIiCcqj8URcTqwf7OhmZlZu9XtPppamV2L1HIYyG8xmJnZCFD3wv7NyvQqYAHw4SGPxszMOqru3Ud/2XQgZmbWeXW7jz7b1/KI+NbQhGNmZp00kLuPdgXm5PkDgJuB+5sIyszMOqNuUpgMTI2IZwAknQxcGREzmwrMzMzar+7XXGwGvFSZfymX9UrSFEnXS7pb0l2SPp3LN5F0taT789835nJJOkPSfEm397jjyczM2qBuUjgXuFnSybmVcBMwu5/nrAI+FxE7AdOB4yTtRPoepWsjYjvg2jwPsC+wXX4cC3xvIBUxM7PBq/vhtX8Gjgaeyo+jI+KUfp7zSET8d55+BriH9OM8M3g1ocwGDsrTM4BzI5kLjJe0+cCqY2Zmg1G3pQCwPvB0RHwbWCRp67pPlNQF7EJqYWwWEY/kRY/yajfUJGBh5WmLclnPbR0raZ6kecuWLRtA+GZm1p+6P8f5FeAE4Au5aCxwfs3nbgBcBnwmIp6uLouIYIA/1hMRZ0bEtIiYNnHixIE81czM+lG3pfBXwIHAswARsQTYsL8nSRpLSggXRMR/5uLHuruF8t+luXwxMKXy9Mm5zMzM2qRuUnip+q5e0hv6e4IkAWcD9/T4cNsc4Mg8fSTws0r5EfkupOnAiko3k5mZtUHdzylcIukHpMHfY4C/of8f3HkPcDhwh6TbctlJwKl5e7OAh3j1O5SuAvYD5gPPkQa2zcysjfpNCvkd/8XADsDTwFuAL0fE1X09LyJ+A6iXxXu1WD+A4/qLx8zMmtNvUoiIkHRVROwM9JkIzMxsZKs7pvDfknZtNBIzM+u4umMKuwMzJS0g3YEkUiPi7U0FZmZm7ddnUpC0ZUQ8DHygTfGYmVkH9ddSuJz07agPSbosIj7UhpjMzKxD+htTqN49tE2TgZiZWef1lxSil2kzMxuF+us+eoekp0kthvXyNLw60LxRo9GZmVlb9ZkUImLtdgViZmadN5CvzjYzs1HOScHMzAonBTMzK5wUzMyscFIwM7PCScHMzAonBTMzK5wUzMyscFIwM7PCScHMzAonBTMzK5wUzMyscFIwM7PCScHMzAonBTMzK5wUzMyscFIwM7PCScHMzAonBTMzK5wUzMyscFIwM7PCScHMzAonBTMzK5wUzMyscFIwM7PCScHMzAonBTMzK5wUzMyscFIwM7OisaQg6UeSlkq6s1K2iaSrJd2f/74xl0vSGZLmS7pd0tSm4jIzs9412VI4B9inR9mJwLURsR1wbZ4H2BfYLj+OBb7XYFxmZtaLxpJCRNwIPNmjeAYwO0/PBg6qlJ8byVxgvKTNm4rNzMxaa/eYwmYR8UiefhTYLE9PAhZW1luUy15H0rGS5kmat2zZsuYiNTNbA43p1I4jIiTFajzvTOBMgGnTpg34+a10nXhlmV5w6v5DsUkzsxGp3S2Fx7q7hfLfpbl8MTClst7kXGZmZm3U7qQwBzgyTx8J/KxSfkS+C2k6sKLSzWRmZm3SWPeRpAuBPYAJkhYBXwFOBS6RNAt4CPhwXv0qYD9gPvAccHRTcZmZWe8aSwoRcVgvi/ZqsW4AxzUVi5mZ1eNPNJuZWeGkYGZmhZOCmZkVTgpmZlY4KZiZWeGkYGZmhZOCmZkVTgpmZlY4KZiZWeGkYGZmhZOCmZkVTgpmZlY4KZiZWeGkYGZmhZOCmZkVTgpmZlY4KZiZWeGkYGZmhZOCmZkVTgpmZlaM6XQAw03XiVeW6QWn7t/BSMzM2s8tBTMzK5wUzMyscFIwM7PCYwp98PiCma1p3FIwM7PCLYWaqq0GcMvBzEYntxTMzKxwUjAzs8JJwczMCicFMzMrPNC8mny7qpm1WzuuO24pmJlZ4aRgZmaFu4+GgLuSzGy0cEvBzMwKtxQa5BaEmY00bimYmVnhlsIQ6/kdSf2VV1sQQ7XOSDWaWlb+riwbqYZVUpC0D/BtYG3grIg4tcMhNa63i/xA1xno+r0lGl+8rJvPizXTsEkKktYGvgO8D1gE/F7SnIi4u7ORjQwDTRx1ttPbhaCvfdV5zkAvML09d6Db9EXOWvF58VrDJikAuwHzI+JBAEkXATMAJ4UGNNFCWZ19DyZBDFUMvamTgIbyGA00pqHa12DqM9DuzKF8QzGYYz+YNxFNJJHh1N2oiOjYzqskHQzsExEfy/OHA7tHxKd6rHcscGyefQtw32rucgLw+Go+d6RyndcMrvOaYTB13ioiJrZaMJxaCrVExJnAmYPdjqR5ETFtCEIaMVznNYPrvGZoqs7D6ZbUxcCUyvzkXGZmZm0ynJLC74HtJG0taR3gUGBOh2MyM1ujDJvuo4hYJelTwC9Jt6T+KCLuanCXg+6CGoFc5zWD67xmaKTOw2ag2czMOm84dR+ZmVmHOSmYmVkx6pOCpH0k3SdpvqQTWyxfV9LFeflNkro6EOaQqlHnz0q6W9Ltkq6VtFUn4hxK/dW5st6HJIWkEX/7Yp06S/pwfq3vkvTjdsc41Gqc21tKul7Srfn83q8TcQ4VST+StFTSnb0sl6Qz8vG4XdLUQe80IkbtgzRg/QCwDbAO8Adgpx7r/C3w/Tx9KHBxp+NuQ53/Elg/T39yTahzXm9D4EZgLjCt03G34XXeDrgVeGOef1On425Dnc8EPpmndwIWdDruQdb5L4CpwJ29LN8P+AUgYDpw02D3OdpbCuWrMyLiJaD7qzOqZgCz8/SlwF6S1MYYh1q/dY6I6yPiuTw7l/SZkJGszusM8E/A14EX2hlcQ+rU+RjgOxHxFEBELG1zjEOtTp0D2ChPbwwsaWN8Qy4ibgSe7GOVGcC5kcwFxkvafDD7HO1JYRKwsDK/KJe1XCciVgErgE3bEl0z6tS5ahbpncZI1m+dc7N6SkQ084VO7Vfndd4e2F7SbyXNzd9CPJLVqfPJwExJi4CrgL9rT2gdM9D/934Nm88pWPtJmglMA97b6ViaJGkt4FvAUR0Opd3GkLqQ9iC1Bm+UtHNELO9kUA07DDgnIr4p6d3AeZLeFhGvdDqwkWK0txTqfHVGWUfSGFKT84m2RNeMWl8XImlv4IvAgRHxYptia0p/dd4QeBvwa0kLSH2vc0b4YHOd13kRMCci/hQRfwT+HylJjFR16jwLuAQgIn4HjCN9cdxoNeRfDzTak0Kdr86YAxyZpw8Gros8gjNC9VtnSbsAPyAlhJHezwz91DkiVkTEhIjoiogu0jjKgRExrzPhDok65/blpFYCkiaQupMebGOMQ61OnR8G9gKQtCMpKSxra5TtNQc4It+FNB1YERGPDGaDo7r7KHr56gxJ/wjMi4g5wNmkJuZ80oDOoZ2LePBq1vlfgQ2An+Qx9Ycj4sCOBT1INes8qtSs8y+B90u6G3gZ+IeIGLGt4Jp1/hzwQ0l/Txp0Pmokv8mTdCEpsU/I4yRfAcYCRMT3SeMm+wHzgeeAowe9zxF8vMzMbIiN9u4jMzMbACcFMzMrnBTMzKxwUjAzs8JJwczMCicFMzMrnBTMzKz4/y3XvwAqR1ZgAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 29, "id": "italian-motel", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios (<=0.04)')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEICAYAAACwDehOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgv0lEQVR4nO3deZwdVZ338c8Xwr4YIA0DCTEB2RUUIzCPMiKobEJwB2Vz0Iiio6MMm46gjzAw44gwuEVAAiIBQRZFZlhUeBwFTCAsYZEAgSxAGjBAWIKB3/PHOV0UnXu7qzt9b92kv+/X67666lTVqV9V36pfnTp171VEYGZmBrBS3QGYmVnncFIwM7OCk4KZmRWcFMzMrOCkYGZmBScFMzMrOCm0kKSQ9Ka647BE0iclXdvG9V0j6bAK843L75URg1zPCZLOHsyy7SRprKRFklYe4npXk3SPpI2Hst52k7SfpIvrjsNJoQ+S/lvStxqUT5T0+GAP4mWMaWY+sBZJekXSS6XxE9oUw7qSvifp0bzeB/P4qBau8zxJ316WOiLiwoh4f8X1HS7pD31M/5Gk8xuU7yBpsaT1I2LviJiyLDE3qH83SXPLZRFxSkR8eijXk9d1eH6PLZL0rKQ7JH1gAMvPlvTeUpyPRsTaEfHKEIc6CbgpIh4b4noBUHKapKfy6zRJ6mP+T0h6RNLzkq6QtH6DebbIx+7Pesoi4lfAdpK2b8V2VOWk0LcpwMEN3gCHABdGxJJ2BxQR2+UDa23g/wFf6BmPiFNavX5JqwI3ANsBewHrAn8PPAXs1Or19xFXuxP0FOBDktbqVX4I8OuIeLrN8bTKn/J7bSTwA2CqpJG1RrS0I4ELqswoaaNB1D8JOADYAdge2A/4bJP6twN+THofbAS8QNpvvX0f+HOD8ovy+uoTEX41eQFrAM8A/1AqWw94ifQG2Qn4E7AQeAw4C1i1NG8Ab8rDvwc+XZp2OPCH0vjWwHXA08D9wMcqxFfUCdwN7FeatgrwJPA2YFyOZRIwP8d6dGnelYDjgAdJJ/dLgPWbrPPTwBPA2n3EtU2ObSEwE9i/NO080gFxNfAccAuweZ4m4HRgAfAscBfw5hz334CXgUXAr/L8s4FjgTuBxcCI0nY8B9wDfLCPfR6kE8oDOdbv5xi2yf/jV/L6FjbZzvuBQ0vjK+f9O7HB/2cl4OvAI3n7zgfekKf1/H9G5PFPAffmbXgI+GwuXwt4EXg1x7UI2AQ4CfhZKY79835fmGPYpjRtNnB03mfPABcDqzfZvt77a80c5zvy+ObAb0nvmSeBC4GRedoFOc4Xc5zHNNjOTYCrSO/5WcBnSuvaCZiW3wdPAN9tEuPYvI4RfbwfVwE+mNf13CDOA38EJpXGjwBubjLvKcDPS+Obk96365TKDiQdY6/7v+Vp7wQeHsrz2IC3t86VLw8v4CfA2aXxzwIz8vDbgV1IJ6Nx+UD+cmneSkkhH+xz8slgBOlE/iSwbT+xFXXmg+7i0rSJwF15uOdgvCiv6y1AN/DePP1LwM3AGGA10pXORU3WORWY0kdMq+QD/ARgVWB30sltqzz9PF5rVYwgnUim5ml7AtNJV6U9J+eNS8t9u9e6ZgMzgE2BNXLZR0knm5WAjwPPl+oo9nnp//PrvL6xeZ/s1WjeJtv6NeD60vieuY5VGvx//jHvl82AtYFfAhf0+v/0nCz3JZ1MBLybdLW5Y562GzC3VxwnkU8uwJZ5m9+X/xfH5PWuWtpnt+Z9tD7pPXtkk+0r9gEp4R1FOsFtmMvelNezGtAF3AR8r9f/572l8d7beRPpKnp14K153+2ep/0JOCQPrw3s0iTGfYGZTaa9BfguKQn/iXTsjixNP46UOBu+SvM9A+xcGp9Ak+QCXAkc26tsEfD2PLwu8BfSsVb830rzrp/30bqtPrc1e/n2Uf+mAB+RtHoePzSXERHTI+LmiFgSEbNJJ9N3D2IdHwBmR8RPc123A5eRTnBV/QzYR9K6efwQlm5SfzMino+Iu4CfAgfl8iOBr0XE3IhYTHqzfqTJLZkNSC2NZnYhHcSnRsTLEfFb0on3oNI8l0fErZFuv11IOiFAag2sQ2o1KSLujf7vE58ZEXMi4kWAiPhFRMyPiFcj4mJSK6Cv21qnRsTCiHgU+F0pliouAN4taUweP5R0lfi3BvN+knS1+1BELAKOBw5stI8j4uqIeDCSG4FrgV0rxvRx4OqIuC7H8R1Si/f/lOY5M++jp4Ff0fc27yJpIanl9B3g4IhYkOOcldezOCK6SSfgSu9/SZuSroqPjYiXImIGcDZpH0J6L7xJ0qiIWBQRNzepaiTpoqNc9+6SpgG/yXG/KyL+PiJ+HBELe+aLiFMjYmSzV6nKtUmJocczwNpN+hV6z9sz/zp5+P8C50TEXBrr2ZaRTaa3nJNCPyLiD6Sr9gMkbU46wfwcQNKWkn6dO52fJTUdB9PZ+kZgZ0kLe16kk8jfDSDO+cD/Ah/O93z3Jp1wy+aUhh8hXS32rP/y0rrvJd06aXT/9Smgr6c8NgHmRMSrvdY1ujT+eGn4BdKBRE4gZ5Fu4yyQNLmU5JopbxOSDpU0o7Qtb6bv/0nDWKrIieQmUr/T2qT7zkt1PmebkPZDj0dILaWl9rGkvSXdLOnpvA379LMNTdeT/w9zqLD/m7g5nyDXI91+KZKTpI0kTZU0L7//fzbAOJ+OiPIJvfw+OYLU6rlP0p/76OD+K6+dcHtsSGrF3A3cATxaMaZmFpGu8HusCyyKfGnfz7w98z8n6a3Ae0m3SJvp2ZaFg4p0CDgpVHM+6QrmYOB/IuKJXP5D4D5gi4hYl3TLpNlTCc+T7sn2KJ/w5wA39rpSWTsiPjfAOKfkGD9K6iCc12v6pqXhsaT73z3r37vX+ldvsDzA9cCeDTpYe8wHNpVUfm+NBRrVtZSIODMi3g5sSzop/EvPpGaL9AxIeiPpdt8XgA3yyexumv9P+gyl4nxTSK2yD5PuBU9vMt98UvLtMRZYQrpfXpC0GqmV+B1go7wNv+G1begvrtetJ1/NbkrF/d9Mbt18DjhE0tty8Sk5nrfk9//BvH5f9xXrfGB9SeUTevE+iYgHIuIg0gn+NODSJu+5O4Hx5RZXREwlHV8XkJLLfEk/kfSu8oJKj/IuavYqzTqT1IfYY4dc1sjr5pW0Gen22l9It/7GAY9KepzUt/NhSbeVlt+GdNfg2Sb1t5yTQjXnkzL8Z8i3jrJ1SB1hiyRtTTpomplBelplTaXPLhxRmvZrYEtJh0haJb/eIWmbAcZ5BbAjqY+g0RXrv+b1b0fqv+h5JvpHwMn5pIqkLkkTm6zjAlISuUzS1pJWkrRBPsD2IXUcvwAck7djN9LTGlP7Cz5v886SViEl0ZdInZWQTp6b9VPFWqQTUXeu71OklsJgPAGMyU9b9eUy0snsm7z+vdHbRcA/SxqfWxWnkPqAej/BtirpJNINLJG0N1B+jPYJYANJb2iynkuAfSXtkffjV0md8H/sZzv6lW83nQ18IxetQ7oyfkbSaF5L4OVYG/7PImJOjunfJK2eH8M8gtTaQNLBkrpyS2dhXuzVBvXMJfWZ7NSr/KWI+HmkR5B3IPVv/FTSg6V5TonXntxb6lWq7nzgK5JGS9qEtE/Pa7KbLgT2k7RrTmLfAn6ZW0STSX1Fb82vH5EeuNiztPy7gWua1N0WTgoV5P6CP5JOOleVJh0NfIJ0H/AnvHaSbeR0UifdE6STR3FrJ79h3k96KmE+qXl/GunkMJA4XySdpMaTOjJ7u5F0AN0AfCciej7IdUbermslPUfqdN65yToWkxLkfaSnpZ4ldVyOAm6JiJdJSWBv0m23H5Ce0LmvwiasS9qPfyXdSngK+I887Rxg23xb6Iomsd0D/CepU/EJUkfj/1ZYbyO/JV31PS7pyWYzRcTzpH0+hqVv15WdS0qoNwEPkxLeFxvU9xzwT6ST+19J76+rStPvIyWYh/K+2KTX8veTrtj/i7T/9yM9lfZyP9tb1fdIfVfbkxLhjqR75lez9Hvu34Cv5ziPblDXQaQr5/nA5cCJEXF9nrYXMDNfsZ8BHNjTb9RAzyOgDeU+p5MjYgvgsP43sWH9vyI9DXc3aVt/3DMxtyx2zeuaSeqju5DUwb0O8Pk87YWIeLznRUqoL+X+mB4HleuugxrfFrPllaRvAFtGxMGlsnGkE9EqDa5MzZZr+Zbb7cAeFR5M6FiS9iM9cfWxWuNwUlhxKH1y8nbSG+umUvk4nBTMrALfPlpBSPoM6V7/NeWEYGY2EG4pmJlZwS0FMzMrtP1bPofSqFGjYty4cXWHYWa2XJk+ffqTEdHVaNpynRTGjRvHtGnT6g7DzGy5IumRZtN8+8jMzApOCmZmVnBSMDOzgpOCmZkVnBTMzKzgpGBmZgUnBTMzKzgpmJlZwUnBzMwKy/UnmpfFuOOuLoZnn7pvjZGYmXUOtxTMzKzgpGBmZoWWJQVJ50paIOnuXuVflHSfpJmS/r1UfrykWZLul7Tn0jWamVmrtbJP4TzgLOD8ngJJ7wEmAjtExGJJG+bybUk/Wr8dsAlwvaQtI+KVFsZnZma9tKylkH8S8ulexZ8DTo2IxXmeBbl8IjA1IhZHxMPALGCnVsVmZmaNtbtPYUtgV0m3SLpR0jty+WjS7wv3mJvLliJpkqRpkqZ1d3e3OFwzs+Gl3UlhBLA+sAvwL8AlkjSQCiJickRMiIgJXV0NfzjIzMwGqd1JYS7wy0huBV4FRgHzgE1L843JZWZm1kbtTgpXAO8BkLQlsCrwJHAVcKCk1SSNB7YAbm1zbGZmw17Lnj6SdBGwGzBK0lzgROBc4Nz8mOrLwGEREcBMSZcA9wBLgKP85JGZWfu1LClExEFNJh3cZP6TgZNbFY+ZmfXPn2g2M7OCk4KZmRWcFMzMrOCkYGZmBScFMzMrOCmYmVnBScHMzApOCmZmVnBSMDOzgpOCmZkVnBTMzKzgpGBmZgUnBTMzKzgpmJlZwUnBzMwKTgpmZlZoWVKQdK6kBflX1npP+6qkkDQqj0vSmZJmSbpT0o6tisvMzJprZUvhPGCv3oWSNgXeDzxaKt6b9LvMWwCTgB+2MC4zM2uiZUkhIm4Cnm4w6XTgGCBKZROB8yO5GRgpaeNWxWZmZo21tU9B0kRgXkTc0WvSaGBOaXxuLmtUxyRJ0yRN6+7ublGkZmbDU9uSgqQ1gROAbyxLPRExOSImRMSErq6uoQnOzMwAGNHGdW0OjAfukAQwBrhN0k7APGDT0rxjcpmZmbVR21oKEXFXRGwYEeMiYhzpFtGOEfE4cBVwaH4KaRfgmYh4rF2xmZlZ0spHUi8C/gRsJWmupCP6mP03wEPALOAnwOdbFZeZmTXXsttHEXFQP9PHlYYDOKpVsZiZWTX+RLOZmRWcFMzMrOCkYGZmBScFMzMrOCmYmVnBScHMzApOCmZmVnBSMDOzgpOCmZkVnBTMzKzgpGBmZgUnBTMzKzgpmJlZwUnBzMwKTgpmZlZwUjAzs0Irf3ntXEkLJN1dKvsPSfdJulPS5ZJGlqYdL2mWpPsl7dmquMzMrLlWthTOA/bqVXYd8OaI2B74C3A8gKRtgQOB7fIyP5C0cgtjMzOzBlqWFCLiJuDpXmXXRsSSPHozMCYPTwSmRsTiiHiY9FvNO7UqNjMza6zOPoV/BK7Jw6OBOaVpc3PZUiRNkjRN0rTu7u4Wh2hmNrzUkhQkfQ1YAlw40GUjYnJETIiICV1dXUMfnJnZMDai3SuUdDjwAWCPiIhcPA/YtDTbmFxmZmZt1NaWgqS9gGOA/SPihdKkq4ADJa0maTywBXBrO2MzM7MWthQkXQTsBoySNBc4kfS00WrAdZIAbo6IIyNipqRLgHtIt5WOiohXWhWbmZk11rKkEBEHNSg+p4/5TwZOblU8ZmbWP3+i2czMCk4KZmZWcFIwM7OCk4KZmRWcFMzMrOCkYGZmBScFMzMrOCmYmVnBScHMzApOCmZmVnBSMDOzgpOCmZkVnBTMzKxQKSlIekurAzEzs/pVbSn8QNKtkj4v6Q0tjcjMzGpTKSlExK7AJ0k/mTld0s8lva+lkZmZWdtV7lOIiAeArwPHAu8GzpR0n6QPNZpf0rmSFki6u1S2vqTrJD2Q/66XyyXpTEmzJN0pacdl2ywzMxuMqn0K20s6HbgX2B3YLyK2ycOnN1nsPGCvXmXHATdExBbADXkcYG/S7zJvAUwCfjiAbTAzsyFStaXwX8BtwA4RcVRE3AYQEfNJrYelRMRNwNO9iicCU/LwFOCAUvn5kdwMjJS0ceWtMDOzIVH1N5r3BV6MiFcAJK0ErB4RL0TEBQNY30YR8VgefhzYKA+PBuaU5pubyx6jF0mTSK0Jxo4dO4BVm5lZf6q2FK4H1iiNr5nLBi0iAohBLDc5IiZExISurq5lCcHMzHqpmhRWj4hFPSN5eM1BrO+JnttC+e+CXD6P9GRTjzG5zMzM2qhqUni+/ESQpLcDLw5ifVcBh+Xhw4ArS+WH5qeQdgGeKd1mMjOzNqnap/Bl4BeS5gMC/g74eF8LSLoI2A0YJWkucCJwKnCJpCOAR4CP5dl/A+wDzAJeAD41oK0wM7MhUSkpRMSfJW0NbJWL7o+Iv/WzzEFNJu3RYN4AjqoSi5mZtU7VlgLAO4BxeZkdJRER57ckKjMzq0WlpCDpAmBzYAbwSi4OwEnBzGwFUrWlMAHYNt/mMTOzFVTVp4/uJnUum5nZCqxqS2EUcI+kW4HFPYURsX9LojIzs1pUTQontTIIMzPrDFUfSb1R0huBLSLieklrAiu3NjQzM2u3ql+d/RngUuDHuWg0cEWLYjIzs5pU7Wg+Cngn8CwUP7izYauCMjOzelRNCosj4uWeEUkjGMQ3nJqZWWermhRulHQCsEb+beZfAL9qXVhmZlaHqknhOKAbuAv4LOkL7Br+4pqZmS2/qj599Crwk/wyM7MVVNXvPnqYBn0IEbHZkEdkZma1Gch3H/VYHfgosP7Qh2NmZnWq1KcQEU+VXvMi4nvAvq0NzczM2q3q7aMdS6MrkVoOA/ktht71/TPwadItqbtIv7S2MTAV2ACYDhxSfgzWzMxar+qJ/T9Lw0uA2bz2U5oDImk08E+kr+J+UdIlwIGkn+M8PSKmSvoRcATww8Gsw8zMBqfq00fvacF615D0N2BN4DFgd+ATefoU0pfwOSmYmbVR1dtHX+lrekR8t+oKI2KepO8AjwIvAteSbhctjIgleba5pO9XahTLJGASwNixY6uu1szMKqj64bUJwOdIJ+rRwJHAjsA6+VWZpPWAicB4YBNgLWCvqstHxOSImBARE7q6ugayajMz60fVPoUxwI4R8RyApJOAqyPi4EGs873AwxHRnev6JenL9kZKGpFbC2OAeYOo28zMlkHVlsJGQPlJoJdz2WA8CuwiaU1JAvYA7gF+B3wkz3MYcOUg6zczs0Gq2lI4H7hV0uV5/ABSZ/CARcQtki4FbiM9yXQ7MBm4Gpgq6du57JzB1G9mZoNX9emjkyVdA+yaiz4VEbcPdqURcSJwYq/ih4CdBlunmZktu6q3jyA9OvpsRJwBzJU0vkUxmZlZTar+HOeJwLHA8bloFeBnrQrKzMzqUbWl8EFgf+B5gIiYzwAfRTUzs85XNSm8HBFB/vpsSWu1LiQzM6tL1aRwiaQfkz5L8BngevyDO2ZmK5x+nz7KnyW4GNgaeBbYCvhGRFzX4tjMzKzN+k0KERGSfhMRbwGcCMzMVmBVbx/dJukdLY3EzMxqV/UTzTsDB0uaTXoCSaRGxPatCszMzNqvz6QgaWxEPArs2aZ4zMysRv21FK4gfTvqI5Iui4gPtyEmMzOrSX99CioNb9bKQMzMrH79JYVoMmxmZiug/m4f7SDpWVKLYY08DK91NK/b0ujMzKyt+kwKEbFyuwIxM7P6DeSrs83MbAVXS1KQNFLSpZLuk3SvpL+XtL6k6yQ9kP+uV0dsZmbDWV0thTOA/46IrYEdgHuB44AbImIL4IY8bmZmbdT2pCDpDcA/kH+DOSJejoiFwERe+93nKaTfgTYzszaqo6UwHugGfirpdkln599n2CgiHsvzPA5s1GhhSZMkTZM0rbu7u00hm5kND3UkhRHAjsAPI+JtpO9Set2tovIP+vQWEZMjYkJETOjq6mp5sGZmw0kdSWEuMDcibsnjl5KSxBOSNgbIfxfUEJuZ2bDW9qQQEY8DcyRtlYv2AO4BrgIOy2WHAVe2OzYzs+Gu6ldnD7UvAhdKWhV4CPgUKUFdIukI4BHgYzXFZmY2bNWSFCJiBjChwaQ92hyKmZmV+BPNZmZWcFIwM7OCk4KZmRWcFMzMrOCkYGZmBScFMzMrOCmYmVnBScHMzApOCmZmVnBSMDOzgpOCmZkVnBTMzKzgpGBmZgUnBTMzKzgpmJlZwUnBzMwKtSUFSStLul3Sr/P4eEm3SJol6eL8q2xmZtZGdbYUvgTcWxo/DTg9It4E/BU4opaozMyGsVqSgqQxwL7A2XlcwO7ApXmWKcABdcRmZjac1dVS+B5wDPBqHt8AWBgRS/L4XGB0DXGZmQ1rbU8Kkj4ALIiI6YNcfpKkaZKmdXd3D3F0ZmbDWx0thXcC+0uaDUwl3TY6AxgpaUSeZwwwr9HCETE5IiZExISurq52xGtmNmy0PSlExPERMSYixgEHAr+NiE8CvwM+kmc7DLiy3bGZmQ13nfQ5hWOBr0iaRepjOKfmeMzMhp0R/c/SOhHxe+D3efghYKc64zEzG+46qaVgZmY1c1IwM7OCk4KZmRWcFMzMrOCkYGZmBScFMzMrOCmYmVnBScHMzApOCmZmVnBSMDOzgpOCmZkVav3uo0417riri+HZp+5bYyRmZu3lloKZmRWcFMzMrOCkYGZmBfcp8Po+BDOz4cwtBTMzK7Q9KUjaVNLvJN0jaaakL+Xy9SVdJ+mB/He9dsdmZjbc1XH7aAnw1Yi4TdI6wHRJ1wGHAzdExKmSjgOOI/1uc8fzI6xmtqJoe0shIh6LiNvy8HPAvcBoYCIwJc82BTig3bGZmQ13tfYpSBoHvA24BdgoIh7Lkx4HNmqyzCRJ0yRN6+7ubk+gZmbDRG1PH0laG7gM+HJEPCupmBYRISkaLRcRk4HJABMmTGg4Tzv4iSUzWxHV0lKQtAopIVwYEb/MxU9I2jhP3xhYUEdsZmbDWR1PHwk4B7g3Ir5bmnQVcFgePgy4st2xmZkNd3XcPnoncAhwl6QZuewE4FTgEklHAI8AH6shNjOzYa3tSSEi/gCoyeQ92hmLmZm9nr/moh/+DIKZDSdOCi3khGJmyxsnhQFYlsdQnSDMbHngL8QzM7OCWwodyi0LM6uDk0IHGapPSTuhmNlgOSkMsXZ+/cVAT/5OFmbWH/cpmJlZwS2F5UAnXuF3YkxmtuycFFZwnfJtrk4iZssHJwVrqndCGWi/hZktf5wUajbQk2irT7o+qZsNb+5oNjOzglsKNeiEq/HBxNCKuOvqa6iyXveD2HDkpLCC6IREs6zq+txFu/fd8pJslpc422FZLiIG0zdXJycFa7sqJ+Fm81Q5IJvN3+4vNBzodg5Vi6XKyakVSbTTT3ZWTcclBUl7AWcAKwNnR8SpNYdk/ajrU9ytmH9Z6xyqE2OnfTK+HfEM5dX4QOqpMn8zQ3kbtlOSakd1NEtaGfg+sDewLXCQpG3rjcrMbPjotJbCTsCsiHgIQNJUYCJwT61RmVXUya2moVq2ap3Lcuuu3S28oZ5nKLW7ZaGIaEnFgyHpI8BeEfHpPH4IsHNEfKE0zyRgUh7dCrh/kKsbBTy5DOG2SqfGBZ0bm+MaGMc1MCtiXG+MiK5GEzqtpdCviJgMTF7WeiRNi4gJQxDSkOrUuKBzY3NcA+O4Bma4xdVRfQrAPGDT0viYXGZmZm3QaUnhz8AWksZLWhU4ELiq5pjMzIaNjrp9FBFLJH0B+B/SI6nnRsTMFq1umW9BtUinxgWdG5vjGhjHNTDDKq6O6mg2M7N6ddrtIzMzq5GTgpmZFVaYpCBpL0n3S5ol6bgG01eTdHGefoukcaVpx+fy+yXtWbXOGuOaLekuSTMkTWtnXJI2kPQ7SYskndVrmbfnuGZJOlOSOiSu3+c6Z+TXhm2M632Spuf9Ml3S7qVl6txffcVV5/7aqbTeOyR9sGqdNcZV2/FYmj42v/ePrlpnUxGx3L9IndIPApsBqwJ3ANv2mufzwI/y8IHAxXl42zz/asD4XM/KVeqsI648bTYwqqb9tRbwLuBI4Kxey9wK7AIIuAbYu0Pi+j0woab99TZgkzz8ZmBeh+yvvuKqc3+tCYzIwxsDC0gPxNR9PDaMq+7jsTT9UuAXwNFV62z2WlFaCsXXY0TEy0DP12OUTQSm5OFLgT3yldlEYGpELI6Ih4FZub4qddYR11AYdFwR8XxE/AF4qTyzpI2BdSPi5kjvyvOBA+qOa4gsS1y3R8T8XD4TWCNf9dW9vxrGNcD1tyKuFyJiSS5fHeh5EqbW47GPuIbCspwnkHQA8DDp/ziQOhtaUZLCaGBOaXxuLms4T/7nPgNs0MeyVeqsIy5Ib8hrc7N/EgO3LHH1VefcfuqsI64eP83N+38dxG2aoYrrw8BtEbGYztpf5bh61La/JO0saSZwF3Bknl738dgsLqjxeJS0NnAs8M1B1NlQR31OwSp7V0TMy/d6r5N0X0TcVHdQHeyTeX+tA1wGHEK6Mm8bSdsBpwHvb+d6+9Mkrlr3V0TcAmwnaRtgiqRr2rXuvjSKKyJeot7j8STg9IhYNIguqYZWlJZCla/HKOaRNAJ4A/BUH8sOxVdutCIuIqLn7wLgcgZ+W2lZ4uqrzjH91FlHXOX99Rzwc9q8vySNIf2fDo2IB0vz17q/msRV+/4qxXEvsIjc51Ghzjriqvt43Bn4d0mzgS8DJyh9AHjw+2uwnSOd9CK1eB4idcj2dKps12ueo3h9R80leXg7Xt+h+xCpk6bfOmuKay1gnTzPWsAfSd8s25a4StMPp/+O5n3qjivXOSoPr0K6H3tkG/+PI/P8H2pQb237q1lcHbC/xvNaB+4bgfmkbwOt+3hsFldHHI+5/CRe62ge9P6qHHinv4B9gL+Qety/lsu+Beyfh1cn9c7PygfjZqVlv5aXu5/SEyCN6qw7LtLTBHfk18ya4poNPE26WppLfqoBmADcnes8i/yJ+TrjygfqdODOvL/OID/F1Y64gK8DzwMzSq8N695fzeLqgP11SF7vDOA24IBOOB6bxUUHHI+lOk4iJ4Vl2V/+mgszMyusKH0KZmY2BJwUzMys4KRgZmYFJwUzMys4KZiZWcFJwczMCk4KZmZW+P+EafBiAmuCpgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF[valTypeConstDF['violation_ratio'] <= 0.04].violation_ratio.plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios (<=0.04)\")" ] }, { "cell_type": "code", "execution_count": 30, "id": "prescription-ceramic", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/904\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(valTypeConstDF['violation_ratio'] >= 3.950680)}/{len(valTypeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "quiet-gardening", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# valTypeConstDF.sort_values(by=['violation_ratio'],ascending=False).head().paths.values" ] }, { "cell_type": "code", "execution_count": 32, "id": "documentary-pipeline", "metadata": {}, "outputs": [], "source": [ "# !head ../../allConstraintsAnalysisWRemoved2/typeConstraint/normal/claims.type-constraints.instanceOf.P7535.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 33, "id": "tutorial-mineral", "metadata": {}, "outputs": [], "source": [ "for key1 in valueTypeConstViolations.keys():\n", " valueTypeConstViolations[key1]['correct'] = valueTypeConstViolations[key1]['instanceOf']['correct'] + valueTypeConstViolations[key1]['subclass']['correct'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " valueTypeConstViolations[key1]['incorrect'] = valueTypeConstViolations[key1]['instanceOf']['incorrect'] + valueTypeConstViolations[key1]['subclass']['incorrect'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " valueTypeConstViolations[key1]['VR'] = valueTypeConstViolations[key1]['incorrect'] / (valueTypeConstViolations[key1]['correct'] + valueTypeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 34, "id": "satellite-concern", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 11564885, 'incorrect': 8245},\n", " 'subclass': {'correct': 55983, 'incorrect': 28},\n", " 'instanceOfOrSubclass': {'correct': 13090, 'incorrect': 137},\n", " 'propCount': 108,\n", " 'correct': 11633958,\n", " 'incorrect': 8410,\n", " 'VR': 0.0007223616363956198},\n", " 'suggestion': {'instanceOf': {'correct': 46189, 'incorrect': 659},\n", " 'subclass': {'correct': 127, 'incorrect': 20},\n", " 'instanceOfOrSubclass': {'correct': 0, 'incorrect': 0},\n", " 'propCount': 5,\n", " 'correct': 46316,\n", " 'incorrect': 679,\n", " 'VR': 0.01444834556867752},\n", " 'normal': {'instanceOf': {'correct': 94112173, 'incorrect': 842434},\n", " 'subclass': {'correct': 4674914, 'incorrect': 9777},\n", " 'instanceOfOrSubclass': {'correct': 77686561, 'incorrect': 289299},\n", " 'propCount': 791,\n", " 'correct': 176473648,\n", " 'incorrect': 1141510,\n", " 'VR': 0.006426872643381034}}" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valueTypeConstViolations" ] }, { "cell_type": "markdown", "id": "traditional-shakespeare", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 78, "id": "spoken-symphony", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "878ab763f4fa4cb9a540c8bf86ea76ec", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/297 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for value type constraint checks\")" ] }, { "cell_type": "markdown", "id": "motivated-sympathy", "metadata": {}, "source": [ "## Item Requires Statement Constraint" ] }, { "cell_type": "markdown", "id": "chubby-glass", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 35, "id": "funny-batch", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/itemRequiresConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 36, "id": "original-expression", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 37, "id": "adequate-symphony", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2305', 'P2316', 'P2304', 'P2303', 'P6607', 'P4155',\n", " 'P31', 'P2916', 'P4680', 'P2308'], dtype=object)" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 38, "id": "infrared-canal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 7182\n", "P2305 2540\n", "P2316 2523\n", "P2303 422\n", "P6607 14\n", "P2304 14\n", "P2916 5\n", "P4680 2\n", "P4155 1\n", "P2308 1\n", "P31 1\n", "Name: label, dtype: int64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 39, "id": "focused-karen", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 40, "id": "private-boundary", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1id
P1006P1006-P2302-Q21503247-0451ef47-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010P1010-P2302-Q21503247-56183614-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010-P2302-Q21503247-fd256eaf-0NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015P1015-P2302-Q21503247-20e3bfc5-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017P1017-P2302-Q21503247-bbac2ce3-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN [P214] NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN [P31] NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN NaN [Q794] [P17] NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN [P31] NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN [P214] NaN \n", "\n", "label P2316 P2916 P31 P4155 P4680 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN NaN NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN NaN NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 [Q21502408] NaN NaN NaN NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN NaN NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 41, "id": "conceptual-schedule", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 42, "id": "third-hayes", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1006NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 P4680 \\\n", "node1 \n", "P1006 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN [Q794] [P17] NaN [Q21502408] NaN NaN NaN NaN \n", "P1015 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1017 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 \n", "P1006 NaN \n", "P1010 NaN \n", "P1010 NaN \n", "P1015 NaN \n", "P1017 NaN " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "shaped-companion", "metadata": {}, "source": [ "However, there is one anomaly where the property does not have a co-dependency constraint associated with it, but still has a link to this constraint." ] }, { "cell_type": "code", "execution_count": 43, "id": "indian-journal", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P5447NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
P5448NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 \\\n", "node1 \n", "P5447 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "P5448 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "\n", "label P4680 P6607 \n", "node1 \n", "P5447 [Q46466783] NaN \n", "P5448 [Q46466783] NaN " ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P4680'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "code", "execution_count": 44, "id": "discrete-template", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q20808382, Q28218485, Q3044918][P39]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q82955][P106]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q5][P31]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q142, Q71084][P27]NaNNaNNaNNaNNaNNaNNaN
....................................
P980NaNNaN[Q34][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q55][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q1852859][P31]NaNNaNNaNNaNNaNNaNNaN
P988NaNNaN[Q928][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P990[Q49678, Q853715]NaN[Q5][P31]NaNNaNNaNNaNNaNNaNNaN
\n", "

2540 rows × 11 columns

\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 \\\n", "node1 \n", "P1010 NaN NaN [Q794] [P17] \n", "P1045 NaN NaN [Q20808382, Q28218485, Q3044918] [P39] \n", "P1045 NaN NaN [Q82955] [P106] \n", "P1045 NaN NaN [Q5] [P31] \n", "P1045 NaN NaN [Q142, Q71084] [P27] \n", "... ... ... ... ... \n", "P980 NaN NaN [Q34] [P17] \n", "P981 NaN NaN [Q55] [P17] \n", "P981 NaN NaN [Q1852859] [P31] \n", "P988 NaN NaN [Q928] [P17] \n", "P990 [Q49678, Q853715] NaN [Q5] [P31] \n", "\n", "label P2308 P2316 P2916 P31 P4155 P4680 P6607 \n", "node1 \n", "P1010 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... ... ... \n", "P980 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN NaN NaN NaN NaN NaN NaN \n", "P988 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P990 NaN NaN NaN NaN NaN NaN NaN \n", "\n", "[2540 rows x 11 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P2305'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "markdown", "id": "forced-christmas", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "markdown", "id": "acquired-floor", "metadata": {}, "source": [ "#### Version 1 - Mandatory + Suggestion + Normal" ] }, { "cell_type": "code", "execution_count": 29, "id": "turkish-establishment", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "28d37088e10e43daa81f2da30f5d8be3", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + suggestion + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved2/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 30, "id": "peripheral-herald", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "534" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 31, "id": "incorporated-logistics", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fCnt" ] }, { "cell_type": "code", "execution_count": 123, "id": "welcome-welding", "metadata": {}, "outputs": [], "source": [ "# from tqdm.notebook import tqdm\n", "# import os.path\n", "# import os\n", "# folderName = 'codependencyConstraint'\n", "# for prop in tqdm(dfItemRequires.index.unique()):\n", "# for subFolderName in ['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal']:\n", "# if os.path.isfile(\"../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv\") and \\\n", "# os.path.isfile(\"../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv\"):\n", "# os.system(\"kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", "# ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", "# -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_w_exceptions.tsv\")" ] }, { "cell_type": "code", "execution_count": 32, "id": "optimum-blowing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,28):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/codepConst_MSN_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "indoor-verse", "metadata": {}, "source": [ "#### Version 2 - Mandatory + Normal" ] }, { "cell_type": "code", "execution_count": 33, "id": "furnished-paradise", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2c4f963cc8324623abcb436adbc83b2b", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved2/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 34, "id": "searching-individual", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "475" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 44, "id": "silver-clarity", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,26):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/codepConst_MN_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "prescription-access", "metadata": {}, "source": [ "#### Version 3 - Mandatory" ] }, { "cell_type": "code", "execution_count": 35, "id": "married-porter", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "be961871162c4726aefe3f576c1abcbe", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved2/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 36, "id": "according-blackberry", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "79" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 45, "id": "extraordinary-drawing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,12):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/codepConst_M_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "subsequent-brown", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 38, "id": "operational-migration", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1e24472c9c45421fb77d68bd305ccfa7", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved2/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 39, "id": "harmful-binary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "424" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 46, "id": "advance-married", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,21):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/codepConst_N_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "ranging-journal", "metadata": {}, "source": [ "#### Version 5 - Suggestion" ] }, { "cell_type": "code", "execution_count": 41, "id": "missing-jordan", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "21d2dfa4582b40d09106a8adf878cdde", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = suggestion\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved2/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 42, "id": "soviet-forth", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "97" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 46, "id": "racial-stationery", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,5):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/codepConst_S_Validator_new_3_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "structural-envelope", "metadata": {}, "source": [ "### Merge all correct/incorrect outputs" ] }, { "cell_type": "code", "execution_count": 17, "id": "joined-invention", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "57dcbdd4c8014c9288dbb92b331a05a6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# import os\n", "# from tqdm.notebook import tqdm\n", "\n", "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysisWRemoved2/codependencyConstraint/\" + folder + \"/\"\n", "# correct_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".correct.\" in f, os.listdir(folderPath))])\n", "# incorrect_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".incorrect.\" in f, os.listdir(folderPath))])\n", "# # print(files_list)\n", "# os.system(\"{ kgtk cat -i \"+ correct_files_list + \" -o \"+folderPath+\"claims.all.correctSuperSet.tsv -v True; } 2> \"+folderPath+\"claims.all.correctSuperSet.log\")\n", "# os.system(\"{ kgtk cat -i \"+ incorrect_files_list + \" -o \"+folderPath+\"claims.all.incorrectSuperSet.tsv -v True; } 2> \"+folderPath+\"claims.all.incorrectSuperSet.log\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "stopped-bolivia", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "68395f72036a469fad8908d916303bcd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# import os\n", "# from tqdm.notebook import tqdm\n", "\n", "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysisWRemoved2/codependencyConstraint_Final/\" + folder + \"/\"\n", "# correct_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".correct.\" in f, os.listdir(folderPath))])\n", "# incorrect_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".incorrect.\" in f, os.listdir(folderPath))])\n", "# # print(files_list)\n", "# os.system(\"{ kgtk cat -i \"+ correct_files_list + \" -o \"+folderPath+\"claims.all.correctSuperSet.tsv; } 2> \"+folderPath+\"claims.all.correctSuperSet.log\")\n", "# os.system(\"{ kgtk cat -i \"+ incorrect_files_list + \" -o \"+folderPath+\"claims.all.incorrectSuperSet.tsv; } 2> \"+folderPath+\"claims.all.incorrectSuperSet.log\")" ] }, { "cell_type": "code", "execution_count": null, "id": "criminal-central", "metadata": {}, "outputs": [], "source": [ "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysisWRemoved2/codependencyConstraint/\" + folder + \"/\"\n", "# folderPathNew = \"../../allConstraintsAnalysisWRemoved2/codependencyConstraint_Final/\" + folder + \"/\"\n", "# os.system(f\"screen -dm kgtk ifnotexists -i {folderPathNew}claims.all.correctSuperSet.tsv --filter-on {folderPath}claims.all.correctSuperSet.tsv -o {folderPathNew}claims.all.correctSuperSet.diff.tsv\")\n", "# os.system(f\"screen -dm kgtk ifnotexists -i {folderPathNew}claims.all.incorrectSuperSet.tsv --filter-on {folderPath}claims.all.incorrectSuperSet.tsv -o {folderPathNew}claims.all.incorrectSuperSet.diff.tsv\")\n", " " ] }, { "cell_type": "markdown", "id": "homeless-pleasure", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 71, "id": "welcome-dependence", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d00113c7ab5a4ed7a7b582d4991877b2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4a8045f2c85240ba92343d7ff646f249", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1206 [00:00= 3.539484)}/{len(codepConstDF1)}\")" ] }, { "cell_type": "markdown", "id": "greater-genetics", "metadata": {}, "source": [ "#### Version 2 - Mand Normal" ] }, { "cell_type": "code", "execution_count": null, "id": "constant-chance", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF2 = pd.DataFrame(codepConstViolations['Mand_Normal']).T" ] }, { "cell_type": "code", "execution_count": null, "id": "included-adjustment", "metadata": {}, "outputs": [], "source": [ "codepConstDF2" ] }, { "cell_type": "code", "execution_count": null, "id": "fundamental-knowing", "metadata": {}, "outputs": [], "source": [ "codepConstDF2['violation_ratio'] = codepConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "harmful-discipline", "metadata": {}, "outputs": [], "source": [ "codepConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": null, "id": "unlikely-chamber", "metadata": { "scrolled": false }, "outputs": [], "source": [ "codepConstDF2.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": null, "id": "violent-match", "metadata": { "scrolled": true }, "outputs": [], "source": [ "codepConstDF2['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "educational-thickness", "metadata": {}, "outputs": [], "source": [ "codepConstDF2['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": null, "id": "latin-mitchell", "metadata": { "scrolled": true }, "outputs": [], "source": [ "codepConstDF2[codepConstDF2['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": null, "id": "asian-forwarding", "metadata": {}, "outputs": [], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF2['violation_ratio'] >= 2.290915)}/{len(codepConstDF2)}\")" ] }, { "cell_type": "markdown", "id": "destroyed-flash", "metadata": {}, "source": [ "#### Version 3 - Mand" ] }, { "cell_type": "code", "execution_count": null, "id": "consecutive-plenty", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF3 = pd.DataFrame(codepConstViolations['Mand']).T" ] }, { "cell_type": "code", "execution_count": null, "id": "digital-mileage", "metadata": {}, "outputs": [], "source": [ "codepConstDF3" ] }, { "cell_type": "code", "execution_count": null, "id": "formed-battle", "metadata": {}, "outputs": [], "source": [ "codepConstDF3['violation_ratio'] = codepConstDF3.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "numerous-construction", "metadata": {}, "outputs": [], "source": [ "codepConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": null, "id": "identified-marble", "metadata": {}, "outputs": [], "source": [ "codepConstDF3.loc['P1713']" ] }, { "cell_type": "code", "execution_count": null, "id": "established-mounting", "metadata": {}, "outputs": [], "source": [ "!head ../../allConstraintsAnalysisWRemoved2/codependencyConstraint_Final/Mand/claims.P1713.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": null, "id": "naval-functionality", "metadata": {}, "outputs": [], "source": [ "!cat ../../allConstraintsAnalysisWRemoved2/codependencyConstraint/Mand/claims.P1713.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": null, "id": "imposed-bibliography", "metadata": { "scrolled": false }, "outputs": [], "source": [ "codepConstDF3.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": null, "id": "emotional-crown", "metadata": { "scrolled": true }, "outputs": [], "source": [ "codepConstDF3['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "certain-freeze", "metadata": {}, "outputs": [], "source": [ "codepConstDF3['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": null, "id": "cooperative-ownership", "metadata": { "scrolled": true }, "outputs": [], "source": [ "codepConstDF3[codepConstDF3['violation_ratio'] <= 0.0005].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005\")" ] }, { "cell_type": "code", "execution_count": null, "id": "studied-inclusion", "metadata": {}, "outputs": [], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF3['violation_ratio'] >= 0.922928)}/{len(codepConstDF3)}\")" ] }, { "cell_type": "markdown", "id": "protective-brazil", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": null, "id": "laughing-pressing", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF4 = pd.DataFrame(codepConstViolations['Normal']).T" ] }, { "cell_type": "code", "execution_count": null, "id": "loving-swift", "metadata": {}, "outputs": [], "source": [ "codepConstDF4" ] }, { "cell_type": "code", "execution_count": null, "id": "north-christian", "metadata": {}, "outputs": [], "source": [ "codepConstDF4['violation_ratio'] = codepConstDF4.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "closing-causing", "metadata": {}, "outputs": [], "source": [ "codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": null, "id": "weighted-input", "metadata": {}, "outputs": [], "source": [ "# list(codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(5).paths)" ] }, { "cell_type": "code", "execution_count": null, "id": "brief-effect", "metadata": { "scrolled": false }, "outputs": [], "source": [ "codepConstDF4.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": null, "id": "wireless-passenger", "metadata": { "scrolled": true }, "outputs": [], "source": [ "codepConstDF4['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "civilian-arnold", "metadata": {}, "outputs": [], "source": [ "codepConstDF4['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": null, "id": "threaded-cooler", "metadata": { "scrolled": true }, "outputs": [], "source": [ "codepConstDF4[codepConstDF4['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": null, "id": "olympic-charlotte", "metadata": {}, "outputs": [], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF4['violation_ratio'] >= 2.414703)}/{len(codepConstDF4)}\")" ] }, { "cell_type": "markdown", "id": "published-affiliate", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "aggregate-conservative", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "timesVersion = {\"MSN\": [], \"MN\": [], \"M\": [], \"N\": [], \"S\": []}\n", "filePath = '/data/wd-correctness/propertiesSplitWRemoved2/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"timeLog_codepConst_\"):\n", " ver = filename.split('_')[2]\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes\n", " timesVersion[ver] += tempTimes\n", "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "hearing-treasury", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MSN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "animal-vocabulary", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "gentle-accessory", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['M']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "fresh-namibia", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['N']).describe())" ] }, { "cell_type": "markdown", "id": "industrial-parcel", "metadata": {}, "source": [ "## Symmetric Constraint (Q21510862)\n", "\n", "This constraint says, if node1 has a property with this constraint, then both `(node1)-[prop]->(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "silent-fundamentals", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 48, "id": "known-wednesday", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-13 18:58:46 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510862']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510862)\" \\\n", " -o ../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 49, "id": "legal-diamond", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 50, "id": "exceptional-morris", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 51, "id": "burning-involvement", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 52, "id": "naval-identification", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 53, "id": "considered-madison", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 54, "id": "alone-cattle", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2316', 'P2303'], dtype=object)" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 55, "id": "mighty-ordinary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2316 42\n", "P2303 3\n", "Name: label, dtype: int64" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 56, "id": "sensitive-alliance", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 57, "id": "tender-valley", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1id
P1322P1322-P2302-Q21510862-85dea891-0NaN[Normal]
P1327P1327-P2302-Q21510862-a3c3a094-0NaN[Normal]
P1382P1382-P2302-Q21510862-f6bcfecf-0NaN[Normal]
P1560P1560-P2302-Q21510862-fabecaeb-0NaN[Q21502408]
P1639P1639-P2302-Q21510862-384edcd4-0NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 id \n", "P1322 P1322-P2302-Q21510862-85dea891-0 NaN [Normal]\n", "P1327 P1327-P2302-Q21510862-a3c3a094-0 NaN [Normal]\n", "P1382 P1382-P2302-Q21510862-f6bcfecf-0 NaN [Normal]\n", "P1560 P1560-P2302-Q21510862-fabecaeb-0 NaN [Q21502408]\n", "P1639 P1639-P2302-Q21510862-384edcd4-0 NaN [Q21502408]" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 58, "id": "cellular-canal", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 59, "id": "desperate-poster", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1
P1322NaN[Normal]
P1327NaN[Normal]
P1382NaN[Normal]
P1560NaN[Q21502408]
P1639NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 \n", "P1322 NaN [Normal]\n", "P1327 NaN [Normal]\n", "P1382 NaN [Normal]\n", "P1560 NaN [Q21502408]\n", "P1639 NaN [Q21502408]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "primary-netherlands", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 60, "id": "pointed-haven", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5ff09bb499d044ecaa4605a4ab390068", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "\n", "folderName = 'symmetricConstraint'\n", "shellFileSuffix = 'symmConst_Validator_'\n", "graph_cache_prefix = 'symm_03'\n", "\n", "for row in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " prop = row[0]\n", " constraint = row[1]\n", " mandatory = []\n", " suggestion = []\n", " normal = []\n", " prop = str(prop)\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " sfname = 'mandatory'\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " sfname = 'suggestion'\n", " elif constraint['P2316'][0] == 'Normal':\n", " sfname = 'normal'\n", " else:\n", " sfname = 'normal'\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplitWRemoved2/claims.\"+ prop +\".copy2.tsv \\\n", " --match 'tsv: (node1)-[nodeProp]->(node2), copy2: (node2)-[]->(node1)' \"\n", " \n", " os.system(\"cp ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv ../../propertiesSplitWRemoved2/claims.\"+ prop +\".copy2.tsv\")\n", " \n", " if cnt % 60 == 0:\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " command\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = constraint['P2303']\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)" ] }, { "cell_type": "code", "execution_count": 61, "id": "polar-canada", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "39" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 62, "id": "virtual-disney", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,2):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/symmConst_Validator_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "coral-cheese", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 47, "id": "governmental-backup", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e13ba4b56db84a0f997467ec87fdcec4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c01a49837d8a448ab5a1f234a1214fe5", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/13 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1639210525[../../allConstraintsAnalysisWRemoved2/symmetr...0.011737
P1560348815[../../allConstraintsAnalysisWRemoved2/symmetr...0.004282
P336418131[../../allConstraintsAnalysisWRemoved2/symmetr...0.000551
P2152800[../../allConstraintsAnalysisWRemoved2/symmetr...0.000000
P61852820[../../allConstraintsAnalysisWRemoved2/symmetr...0.000000
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P1639 2105 25 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P1560 3488 15 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P3364 1813 1 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P2152 80 0 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P6185 282 0 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "\n", " violation_ratio \n", "P1639 0.011737 \n", "P1560 0.004282 \n", "P3364 0.000551 \n", "P2152 0.000000 \n", "P6185 0.000000 " ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF1 = pd.DataFrame(symmConstViolations['mandatory']).T\n", "symmConstDF1['violation_ratio'] = symmConstDF1.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 53, "id": "gross-extraction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P27891052016590[../../allConstraintsAnalysisWRemoved2/symmetr...0.058949
P188953333824740[../../allConstraintsAnalysisWRemoved2/symmetr...0.044331
P1971808641737[../../allConstraintsAnalysisWRemoved2/symmetr...0.009513
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2789 105201 6590 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P1889 533338 24740 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P197 180864 1737 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "\n", " violation_ratio \n", "P2789 0.058949 \n", "P1889 0.044331 \n", "P197 0.009513 " ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF2 = pd.DataFrame(symmConstViolations['suggestion']).T\n", "symmConstDF2['violation_ratio'] = symmConstDF2.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 54, "id": "heavy-scout", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P518805[../../allConstraintsAnalysisWRemoved2/symmetr...1.000000
P597401[../../allConstraintsAnalysisWRemoved2/symmetr...1.000000
P17061284[../../allConstraintsAnalysisWRemoved2/symmetr...0.875000
P2652500836[../../allConstraintsAnalysisWRemoved2/symmetr...0.625749
P521424146[../../allConstraintsAnalysisWRemoved2/symmetr...0.256140
P684120917693437304[../../allConstraintsAnalysisWRemoved2/symmetr...0.221346
P30321743316[../../allConstraintsAnalysisWRemoved2/symmetr...0.153473
P1382110751657[../../allConstraintsAnalysisWRemoved2/symmetr...0.130145
P2293147361969[../../allConstraintsAnalysisWRemoved2/symmetr...0.117869
P13277954706[../../allConstraintsAnalysisWRemoved2/symmetr...0.081524
P4545464[../../allConstraintsAnalysisWRemoved2/symmetr...0.080000
P45111072790[../../allConstraintsAnalysisWRemoved2/symmetr...0.066599
P5306730382[../../allConstraintsAnalysisWRemoved2/symmetr...0.053712
P34032174112[../../allConstraintsAnalysisWRemoved2/symmetr...0.048994
P46026570612622[../../allConstraintsAnalysisWRemoved2/symmetr...0.045349
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5188 0 5 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P5974 0 1 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P1706 12 84 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P2652 500 836 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P521 424 146 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P684 12091769 3437304 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P3032 1743 316 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P1382 11075 1657 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P2293 14736 1969 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P1327 7954 706 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P4545 46 4 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P451 11072 790 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P530 6730 382 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P3403 2174 112 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "P460 265706 12622 [../../allConstraintsAnalysisWRemoved2/symmetr... \n", "\n", " violation_ratio \n", "P5188 1.000000 \n", "P5974 1.000000 \n", "P1706 0.875000 \n", "P2652 0.625749 \n", "P521 0.256140 \n", "P684 0.221346 \n", "P3032 0.153473 \n", "P1382 0.130145 \n", "P2293 0.117869 \n", "P1327 0.081524 \n", "P4545 0.080000 \n", "P451 0.066599 \n", "P530 0.053712 \n", "P3403 0.048994 \n", "P460 0.045349 " ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF3 = pd.DataFrame(symmConstViolations['normal']).T\n", "symmConstDF3['violation_ratio'] = symmConstDF3.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 55, "id": "sexual-blowing", "metadata": {}, "outputs": [], "source": [ "# !head ../../allConstraintsAnalysisWRemoved2/symmetricConstraint/normal/claims.P3032.incorrect.tsv\n", "\n" ] }, { "cell_type": "code", "execution_count": 56, "id": "legitimate-aspect", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEICAYAAABVv+9nAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAYlUlEQVR4nO3de7gdVX3G8e9LDhBCAgETaCHEAygiF1toEJWqKKjcsWot2KChQqq2lipWo9KCaBVrVaxPLVCwXOKFi5amgq1SianWgOFS5SIVIUAISAADBCIh8Osfa51ksj2XOefs2fucdd7P8+TJnuv6rT0z7549M9lRRGBmZuXZrNsFmJlZMxzwZmaFcsCbmRXKAW9mVigHvJlZoRzwZmaFcsAXRtI5kv6623W0m6TFkk7qdh3tNta2l6RvS3pHjfl6JYWknhG28xFJ549k2U6SNFvSGkmTul3LSEzIgJf0+5L+R9Jjkh6V9ENJB3S7rsFImifpB0PNFxHvioiPj2D9yyU9JGnryriTJC0e7rq6QdIeki6X9HDerj+R9P4mD8z8nh06mnUMZ3tJulDSJ0baVv4wubif8b8j6WlJ20fE4RFx0UjbGKDdgyWtqI6LiE9GRNs/sPNx8mwO5ccl/a+ko4ax/CbbNCLujYipEfFsu2vthAkX8JK2Ab4FfBHYHtgZ+BjwdDfraoc2hNkk4JQ21CFJHdu3JO0OXAfcB+wbEdsCfwjMAaZ1qo5+6hrR2W2DLgLeVP0Qz04AvhURj3ahpib8KCKmAtOBLwFflzS9qxV1S0RMqD+kg371ANO2AB4lhUTfuB2Ap4CZwMHACuCDwEPAA8AbgSOA/8vLfqSy7BnA5cBC4Angp8AewIfz8vcBr6/Mvy1wQV7v/cAnSKH7YuDXwLPAmr76gQuBfwKuBp4EDs3jPlFZ57HAzcDjwC+Awwbo+3JgQe7D9DzuJGBxZZ5XAD8GHst/v6IybTHwt8APgbXAC4AA3gP8PPf/48DuwP/kei4DtsjLb0f64F0F/Cq/ntWy/pMGqH0hcNUQ2/0Y4FZgdV7Xi1v6/gHgJ7lvlwKT87QZuZbV+b35b9KJ0SXAc7mva/I+0Zv7/E7gXmBJXsflwIN53UuAvSttb9hebNy/TmXj/nVinjYfeAZYl9v79xHu/3cAb68MTwJWAse2vs+5n6cB9+R6Lga2zdP6+tqTh08Ebs/b+S7gT/P4rfN79Fyuew2wE+nYWDja7dNP/+YBP6gMT8l1HpCHdwe+BzwCPAx8hY37+2DbtK+fOwGL8r5wJ3Bypa2XAstI+/Yvgc91Pe+6XUDHOwzb5I17EXA4sF3L9C8Bn64Mn9J3MOUDcD3wN8DmwMmkQPoq6Uxx77xz7JrnP4MUzG8AevIBcjfw0cryd1fa+lfg3HxQ7ABcXzlQNtlx87gL8w5/EOlgnMymgfHSPP11efrOwJ4DvC/LSR8Q36wsvyHgSd92fkU62+sBjs/Dz8vTF5NCbe88ffN8YPxbfs/3Jn1L+i9gN9KH2W3AO/LyzwPeTDogp5FC8cpKfYsZOOAfJAfhANP3IH0Avi7X9cF8cG5R6fv1pIN3e1JQvStP+xRwTl5uc+CVgKrvWaWd3tzni/M23CqP/5Pcpy2Bs4GbW7ZhNeDXA2fmto4gnVxs1zrvKPb/jwLXVIbfQNqHN299n3Pdd+btNTXvG5e09LUv+I4khaeAV+e696/0a0VLHWeQA34026ef/s0jHyekD68/I30o7pDHvSC3syXppG0JcHbrcdDPNu3r5xJSRkwGfje/d6/N034EnJBfTwVe1vW863YBXel0OiO+kHS2tJ70ibxjnnYgKaj6DuJlwFsrO+paYFIenpY3/oGVdd8AvLGyE3+3Mu1o0plB6/LTgR1JAbhVZf7jgWtbd9zK9AuBi/sZ1xcY5wKfr/meLCcF/D6kD4WZbBrwJwDXtyzzI2Befr0YOLNlegAHtbw3H6oMf7Z6cLUs+7vAryrDixk44J9hgG8mefpfA5dVhjcjfUM6uNL3uZXpfweck1+fSfqQesFA71lluDf3ebdBapme59m2n+3Vt3/1VOZ/iBwUtCfgZ+f3a1Ye/grwhf7eZ9KH8Xsq016Ul+2hJfj6aedK4JRKvwYL+BFvn37anUc6plfnWteSj98B5n8jcFONbdoD7EL6Fj2tMv1TwIX59RLS5d4Zo9lG7fwz4a7BA0TE7RExLyJmkQJtJ9KZFRFxHens42BJe5I+8RdVFn8kNt5wWZv//mVl+lrSpzcDTHu4n+WnAs8nnb08IGm1pNWkgN5hiO7cN8i0XUiXZWqLiFtIlyQWtEzaifRVveoe0reCwWpp7X+/75WkKZLOlXSPpMdJB8v0mvcVHgF+e5Dpm9QeEc/lWqu1P1h5/RQbt+FnSGeT35F0l6TW96U/G94HSZMknSXpF7lfy/OkGQP1JSLWD1DLoPKTKWvyn3P6myci7iW9t3MlTSUF3G/ceM1at/k9pKDbsZ+2D5e0ND+0sJr07WOgPg7azjC3T3+WRsR00mW/RaRvXX117ijp65Luz9tj4TDrfDQinqiMqx4D7yR9G/mZpB8P5+ZuUyZkwFdFxM9IZ0b7VEZfBMwlnbVeERG/7kAp95HO4GdExPT8Z5uI2Luv1AGWG2h83zp3H0Etp5MuH1UPsJWkD6Gq2aQzrTq1DOVU0hnigRGxDfCqPF41lr2GdHlnIJvULkmkD7/7B1wii4gnIuLUiNiNdJ34/ZIO6Zs80GKV128j3Qc5lHRZqrevjKHaHmK9/dX6yUhPfEyNiHcNMutFpH37zaRLhDcMMF/rNp9NOjuufkgjaUvgG8Dfk74JTyfdF+rr41D7xYi3z2AiYg3wbuAESfvl0Z/M9eyb97O5bLotBqt1JbC9pOqN+w3HQET8PCKOJ52UfRq4op8b2h014QJe0p6STpU0Kw/vQroUsrQy20LgD0gbf6Czm7aKiAeA7wCflbSNpM0k7S7p1XmWXwKzJG0xjNVeAJwo6ZC8vp3zt5KharmTdCPrLyqjrwb2kPQ2ST2S/gjYi3S23w7TSGf0qyVtT/qQqet04BWSPiPptwAkvUDSwvz0xGXAkfl92Jz0YfI06WbvoCQdldcl0qWrZ0k34iBtk91q9Otp0reMKaSAGak67dXxDVIwfYwU9gP5GvA+Sbvms/1PApe2fMOA9HDClqTr0eslHQ68vqXu50nadoB2Rrx9hhLpyaDzSffNIG2PNcBjknYG/qplkQHf44i4L9f0KUmTJb2EdNa+EEDSXEkz8zeQ1Xmx5/pbV6dMuIAn3eU/ELhO0pOkYL+FtFMBGzbkjaRP8//uYG1vJx0st5FuYF7BxksP3yM9ZfCgpIfrrCwiric93fB5Ujh9n988Cx/ImaQbhX3regQ4ivQ+PUK6EXZURNSqpYazga1ITzYsBf6j7oIR8Qvg5aSz41slPUYKsWXAExFxB+nD+ot5/UcDR0fEuhqrfyHpG8Ia0j2HL0XEtXnap4DT8iW1Dwyw/MWkr/H3k7br0gHmq+MCYK/c3pUjXUlEPEl6f2aRrsEP5MukJ0uWkB4O+DXw3n7W9wTpZOAy0n77NiqXNfO35K8Bd+Xad2pZfjTbp46zgSNyIH8M2J90PFxFunFcNdQ2PZ60n60kPRRxekRck6cdRtr/1gBfAI6LiLX9rKNj+m4kWgtJXwZWRsRp3a7FzGwkxto/xBgTJPUCbwL2G2JWM7MxayJeohmUpI+TLtl8JiLu7nY9ZmYj5Us0ZmaF8hm8mVmhxtQ1+BkzZkRvb2+3yzAzGzduuOGGhyNiZn/TxlTA9/b2smzZsm6XYWY2bkhq/RfmG/gSjZlZoRzwZmaFcsCbmRXKAW9mVigHvJlZoRzwZmaFcsCbmRXKAW9mVigHvJlZocbUv2Qdjd4FV214vfysI7tYiZnZ2OAzeDOzQjngzcwK5YA3MyuUA97MrFAOeDOzQjngzcwK5YA3MyuUA97MrFAOeDOzQjngzcwK5YA3MyuUA97MrFAOeDOzQjngzcwK5YA3MyuUA97MrFAOeDOzQjngzcwK5YA3MytUowEv6X2SbpV0i6SvSZrcZHtmZrZRYwEvaWfgL4A5EbEPMAk4rqn2zMxsU01foukBtpLUA0wBVjbcnpmZZY0FfETcD/w9cC/wAPBYRHyndT5J8yUtk7Rs1apVTZVjZjbhNHmJZjvgWGBXYCdga0lzW+eLiPMiYk5EzJk5c2ZT5ZiZTThNXqI5FLg7IlZFxDPAN4FXNNiemZlVNBnw9wIvkzRFkoBDgNsbbM/MzCqavAZ/HXAFcCPw09zWeU21Z2Zmm+ppcuURcTpwepNtmJlZ//wvWc3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCuWANzMrlAPezKxQDngzs0I54M3MCtVowEuaLukKST+TdLuklzfZnpmZbdTT8Pq/APxHRLxF0hbAlIbbMzOzrLGAl7Qt8CpgHkBErAPWNdWemZltqslLNLsCq4B/kXSTpPMlbd06k6T5kpZJWrZq1aoGyzEzm1iaDPgeYH/gnyJiP+BJYEHrTBFxXkTMiYg5M2fObLAcM7OJpcmAXwGsiIjr8vAVpMA3M7MOaCzgI+JB4D5JL8qjDgFua6o9MzPbVNNP0bwX+Ep+guYu4MSG2zMzs6zRgI+Im4E5TbZhZmb9q3WJRtK+TRdiZmbtVfca/JckXS/pPfn5djMzG+NqBXxEvBL4Y2AX4AZJX5X0ukYrMzOzUan9FE1E/Bw4DfgQ8GrgH/JvzLypqeLMzGzk6l6Df4mkzwO3A68Fjo6IF+fXn2+wPjMzG6G6T9F8ETgf+EhErO0bGRErJZ3WSGVmZjYqdQP+SGBtRDwLIGkzYHJEPBURlzRWnZmZjVjda/DXAFtVhqfkcWZmNkbVDfjJEbGmbyC/9m+7m5mNYXUD/klJG34oTNLvAWsHmd/MzLqs7jX4vwQul7QSEPBbwB81VZSZmY1erYCPiB9L2hPo+2XIOyLimebKMjOz0RrOj40dAPTmZfaXRERc3EhVZmY2arUCXtIlwO7AzcCzeXQADngzszGq7hn8HGCviIgmizEzs/ap+xTNLaQbq2ZmNk7UPYOfAdwm6Xrg6b6REXFMI1WZmdmo1Q34M5oswszM2q/uY5Lfl/R84IURcY2kKcCkZkszM7PRqPtzwScDVwDn5lE7A1c2VJOZmbVB3ZusfwYcBDwOG/7zjx2aKsrMzEavbsA/HRHr+gYk9ZCegzczszGqbsB/X9JHgK3y/8V6OfDvzZVlZmajVTfgFwCrgJ8CfwpcTfr/Wc3MbIyq+xTNc8A/5z9mZjYO1P0tmrvp55p7ROzW9orMzKwthvNbNH0mA38IbN/+cszMrF1qXYOPiEcqf+6PiLNJ/xG3mZmNUXUv0exfGdyMdEY/nN+SNzOzDqsb0p+tvF4PLAfe2vZqzMysbeo+RfOapgsxM7P2qnuJ5v2DTY+Iz7WnHDMza5fhPEVzALAoDx8NXA/8vImizMxs9OoG/Cxg/4h4AkDSGcBVETG3qcLMzGx06v5UwY7AusrwujzOzMzGqLpn8BcD10v61zz8RuCiRioyM7O2qPsUzd9K+jbwyjzqxIi4qbmyzMxstOpeogGYAjweEV8AVkjatc5CkiZJuknSt0ZUoZmZjUjd/7LvdOBDwIfzqM2BhTXbOAW4ffilmZnZaNQ9g/8D4BjgSYCIWAlMG2ohSbNIv1lz/kgLNDOzkal7k3VdRISkAJC0dc3lzgY+yCAfBpLmA/MBZs+eXXO19fUuuGrD6+Vn+ffRzGziqHsGf5mkc4Hpkk4GrmGI//xD0lHAQxFxw2DzRcR5ETEnIubMnDmzZjlmZjaUIc/gJQm4FNgTeBx4EfA3EfHdIRY9CDhG0hGk35DfRtJC/+MoM7POGDLg86WZqyNiX2CoUK8u92HyTVlJBwMfcLibmXVO3Us0N0o6oNFKzMysrereZD0QmCtpOelJGpFO7l9SZ+GIWAwsHkF9ZmY2QoMGvKTZEXEv8IYO1WNmZm0y1Bn8laRfkbxH0jci4s0dqMnMzNpgqGvwqrzerclCzMysvYYK+BjgtZmZjXFDXaL5HUmPk87kt8qvYeNN1m0arc7MzEZs0ICPiEmdKsTMzNprOD8XbGZm44gD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCNRbwknaRdK2k2yTdKumUptoyM7Pf1NPgutcDp0bEjZKmATdI+m5E3NZgm2ZmljV2Bh8RD0TEjfn1E8DtwM5NtWdmZptq8gx+A0m9wH7Adf1Mmw/MB5g9e3Zb2utdcNWw5ll+1pFtn9/MbDCdyJTGb7JKmgp8A/jLiHi8dXpEnBcRcyJizsyZM5sux8xswmg04CVtTgr3r0TEN5tsy8zMNtXkUzQCLgBuj4jPNdWOmZn1r8kz+IOAE4DXSro5/zmiwfbMzKyisZusEfEDQE2t38zMBud/yWpmVigHvJlZoRzwZmaFcsCbmRXKAW9mVigHvJlZoRzwZmaFcsCbmRXKAW9mVigHvJlZoRzwZmaFcsCbmRXKAW9mVigHvJlZoRzwZmaFcsCbmRXKAW9mVqjG/kensah3wVXDGr/8rCNHvP7qssMdP1p11ttU29YZrfust+HQJuI+7zN4M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUA54M7NCOeDNzArlgDczK1SjAS/pMEl3SLpT0oIm2zIzs001FvCSJgH/CBwO7AUcL2mvptozM7NNNXkG/1Lgzoi4KyLWAV8Hjm2wPTMzq1BENLNi6S3AYRFxUh4+ATgwIv68Zb75wPw8+CLgjhE2OQN4eITLjlfuc/kmWn/BfR6u50fEzP4m9Iy8nvaIiPOA80a7HknLImJOG0oaN9zn8k20/oL73E5NXqK5H9ilMjwrjzMzsw5oMuB/DLxQ0q6StgCOAxY12J6ZmVU0dokmItZL+nPgP4FJwJcj4tam2qMNl3nGIfe5fBOtv+A+t01jN1nNzKy7/C9ZzcwK5YA3MyvUuAv4oX7+QNKWki7N06+T1NuFMtumRn/fL+k2ST+R9F+Snt+NOtup7k9cSHqzpJA07h+pq9NnSW/N2/pWSV/tdI3tVmPfni3pWkk35f37iG7U2S6SvizpIUm3DDBdkv4hvx8/kbT/qBuNiHHzh3Sz9hfAbsAWwP8Ce7XM8x7gnPz6OODSbtfdcH9fA0zJr989nvtbt895vmnAEmApMKfbdXdgO78QuAnYLg/v0O26O9Dn84B359d7Acu7Xfco+/wqYH/glgGmHwF8GxDwMuC60bY53s7g6/z8wbHARfn1FcAhktTBGttpyP5GxLUR8VQeXEr69wbjWd2fuPg48Gng150sriF1+nwy8I8R8SuAiHiowzW2W50+B7BNfr0tsLKD9bVdRCwBHh1klmOBiyNZCkyX9NujaXO8BfzOwH2V4RV5XL/zRMR64DHgeR2prv3q9LfqnaQzgPFsyD7nr667RMRVnSysQXW28x7AHpJ+KGmppMM6Vl0z6vT5DGCupBXA1cB7O1Na1wz3eB9S13+qwNpD0lxgDvDqbtfSJEmbAZ8D5nW5lE7rIV2mOZj0LW2JpH0jYnU3i2rY8cCFEfFZSS8HLpG0T0Q81+3CxovxdgZf5+cPNswjqYf01e6RjlTXfrV+7kHSocBHgWMi4ukO1daUofo8DdgHWCxpOela5aJxfqO1znZeASyKiGci4m7g/0iBP17V6fM7gcsAIuJHwGTSj3KVqu0/7zLeAr7Ozx8sAt6RX78F+F7kOxjj0JD9lbQfcC4p3Mf7dVkYos8R8VhEzIiI3ojoJd13OCYilnWn3Laos19fSTp7R9IM0iWbuzpYY7vV6fO9wCEAkl5MCvhVHa2ysxYBb89P07wMeCwiHhjNCsfVJZoY4OcPJJ0JLIuIRcAFpK9yd5JuaBzXvYpHp2Z/PwNMBS7P95LvjYhjulb0KNXsc1Fq9vk/gddLug14FviriBiv30zr9vlU4J8lvY90w3XeOD5ZQ9LXSB/SM/J9hdOBzQEi4hzSfYYjgDuBp4ATR93mOH6/zMxsEOPtEo2ZmdXkgDczK5QD3sysUA54M7NCOeDNzArlgDczK5QD3sysUP8Pgt2N3c4g8xwAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "markdown", "id": "unlikely-sewing", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 11, "id": "southern-reasoning", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "01675fcd83284c8ab2aa683f43fef458", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/108 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "informed-animal", "metadata": {}, "source": [ "## Inverse Constraint (Q21510855)\n", "\n", "This constraint says, if node1 has a property with this constraint, then both `(node1)-[prop]->(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "dramatic-manchester", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 63, "id": "leading-server", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-13 19:00:13 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510855']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510855)\" \\\n", " -o ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 64, "id": "offshore-sudan", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "P1026-P2302-Q21510855-adc83b86-0\tP1026\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1029-P2302-Q21510855-6b55e057-0\tP1029\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P115-P2302-Q21510855-f7aa0b78-0\tP115\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1151-P2302-Q21510855-0d9aa9c6-0\tP1151\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1204-P2302-Q21510855-e3d53bb6-0\tP1204\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1283-P2302-Q21510855-0e7699bb-0\tP1283\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1308-P2302-Q21510855-2aba96b7-0\tP1308\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1365-P2302-Q21510855-c809b758-0\tP1365\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1366-P2302-Q21510855-eee12ef8-0\tP1366\tP2302\tQ21510855\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv" ] }, { "cell_type": "code", "execution_count": 65, "id": "received-colonial", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 66, "id": "overall-expense", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 67, "id": "valid-throat", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 68, "id": "focused-pennsylvania", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/inverseConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 69, "id": "moved-rental", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 70, "id": "attached-rings", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2316', 'P4155', 'P2303'], dtype=object)" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 71, "id": "loving-mileage", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 110\n", "P2316 10\n", "P2303 2\n", "P4155 1\n", "Name: label, dtype: int64" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 72, "id": "local-forty", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 73, "id": "pressed-upset", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1id
P1026P1026-P2302-Q21510855-adc83b86-0NaN[P50]NaNNaN
P1029P1029-P2302-Q21510855-6b55e057-0NaN[P5096]NaNNaN
P115P115-P2302-Q21510855-f7aa0b78-0NaN[P466]NaNNaN
P1151P1151-P2302-Q21510855-0d9aa9c6-0NaN[P1204][Q21502408]NaN
P1204P1204-P2302-Q21510855-e3d53bb6-0NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 id \n", "P1026 P1026-P2302-Q21510855-adc83b86-0 NaN [P50] NaN NaN\n", "P1029 P1029-P2302-Q21510855-6b55e057-0 NaN [P5096] NaN NaN\n", "P115 P115-P2302-Q21510855-f7aa0b78-0 NaN [P466] NaN NaN\n", "P1151 P1151-P2302-Q21510855-0d9aa9c6-0 NaN [P1204] [Q21502408] NaN\n", "P1204 P1204-P2302-Q21510855-e3d53bb6-0 NaN [P1151] NaN NaN" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 74, "id": "extra-stomach", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 75, "id": "seeing-marine", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1
P1026NaN[P50]NaNNaN
P1029NaN[P5096]NaNNaN
P115NaN[P466]NaNNaN
P1151NaN[P1204][Q21502408]NaN
P1204NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 \n", "P1026 NaN [P50] NaN NaN\n", "P1029 NaN [P5096] NaN NaN\n", "P115 NaN [P466] NaN NaN\n", "P1151 NaN [P1204] [Q21502408] NaN\n", "P1204 NaN [P1151] NaN NaN" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "composite-cutting", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 76, "id": "acoustic-belarus", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4db7288263d84bb0b8b61c4e3345a56c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "fOP = None\n", "\n", "folderName = 'inverseConstraint_Final'\n", "shellFileSuffix = 'invConst_Validator_new3_'\n", "graph_cache_file_prefix = \"inv_4_\"\n", "\n", "for prop, constraint in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " subFolderName = \"mandatory\"\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " subFolderName = \"suggestion\"\n", " else:\n", " subFolderName = \"normal\"\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " prop2 = constraint['P2306']\n", "\n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", "\n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved2/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " if cnt % 40 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved2/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplitWRemoved2/claims.\"+ prop2 +\".tsv \\\n", " --match '\"+ \\\n", " f\"{prop}: (node1)-[nodeProp]->(node2), {prop2}: (node2)-[]->(node1)' \"\n", "\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = set(constraint['P2303'])\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", "# print(command) \n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved2/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved2/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 77, "id": "large-climb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "110" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 78, "id": "involved-vietnamese", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,7):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved2/checkViolations/invConst_Validator_new3_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "retired-audio", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 57, "id": "specified-evanescence", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a1a12be02d794481802d7761a975afcc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f69d40508fec4092844cf5e53811c7c3", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/12 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P267383967[../../allConstraintsAnalysisWRemoved2/inverse...0.073951
P41472868[../../allConstraintsAnalysisWRemoved2/inverse...0.027211
P41492864[../../allConstraintsAnalysisWRemoved2/inverse...0.013793
P2033187925[../../allConstraintsAnalysisWRemoved2/inverse...0.013130
P450177922[../../allConstraintsAnalysisWRemoved2/inverse...0.012215
P1151180317[../../allConstraintsAnalysisWRemoved2/inverse...0.009341
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2673 839 67 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P4147 286 8 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P4149 286 4 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P2033 1879 25 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P450 1779 22 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P1151 1803 17 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "\n", " violation_ratio \n", "P2673 0.073951 \n", "P4147 0.027211 \n", "P4149 0.013793 \n", "P2033 0.013130 \n", "P450 0.012215 \n", "P1151 0.009341 " ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1 = pd.DataFrame(invConstViolations['mandatory']).T\n", "invConstDF1['violation_ratio'] = invConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 63, "id": "valid-symposium", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P143436775003[../../allConstraintsAnalysisWRemoved2/inverse...0.576382
P155103664753103[../../allConstraintsAnalysisWRemoved2/inverse...0.048730
P156103663640868[../../allConstraintsAnalysisWRemoved2/inverse...0.037928
P62974030240[../../allConstraintsAnalysisWRemoved2/inverse...0.003231
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1434 3677 5003 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P155 1036647 53103 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P156 1036636 40868 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P629 74030 240 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "\n", " violation_ratio \n", "P1434 0.576382 \n", "P155 0.048730 \n", "P156 0.037928 \n", "P629 0.003231 " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF2 = pd.DataFrame(invConstViolations['suggestion']).T\n", "invConstDF2['violation_ratio'] = invConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 64, "id": "resident-mustang", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P160513190[../../allConstraintsAnalysisWRemoved2/inverse...0.935961
P34486054575[../../allConstraintsAnalysisWRemoved2/inverse...0.883205
P92615[../../allConstraintsAnalysisWRemoved2/inverse...0.833333
P92515[../../allConstraintsAnalysisWRemoved2/inverse...0.833333
P10294902397[../../allConstraintsAnalysisWRemoved2/inverse...0.830274
P115694824721[../../allConstraintsAnalysisWRemoved2/inverse...0.780606
P51342354[../../allConstraintsAnalysisWRemoved2/inverse...0.701299
P38161427[../../allConstraintsAnalysisWRemoved2/inverse...0.658537
P128314052423[../../allConstraintsAnalysisWRemoved2/inverse...0.632968
P8625915[../../allConstraintsAnalysisWRemoved2/inverse...0.625000
P51328190[../../allConstraintsAnalysisWRemoved2/inverse...0.526316
P42527412938[../../allConstraintsAnalysisWRemoved2/inverse...0.517345
P2512221159[../../allConstraintsAnalysisWRemoved2/inverse...0.418421
P167764[../../allConstraintsAnalysisWRemoved2/inverse...0.400000
P25781111622[../../allConstraintsAnalysisWRemoved2/inverse...0.358915
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1605 13 190 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P3448 605 4575 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P926 1 5 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P925 1 5 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P1029 490 2397 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P115 6948 24721 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P5134 23 54 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P3816 14 27 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P1283 1405 2423 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P8625 9 15 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P5132 81 90 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P425 2741 2938 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P2512 221 159 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P1677 6 4 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "P2578 1111 622 [../../allConstraintsAnalysisWRemoved2/inverse... \n", "\n", " violation_ratio \n", "P1605 0.935961 \n", "P3448 0.883205 \n", "P926 0.833333 \n", "P925 0.833333 \n", "P1029 0.830274 \n", "P115 0.780606 \n", "P5134 0.701299 \n", "P3816 0.658537 \n", "P1283 0.632968 \n", "P8625 0.625000 \n", "P5132 0.526316 \n", "P425 0.517345 \n", "P2512 0.418421 \n", "P1677 0.400000 \n", "P2578 0.358915 " ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF3 = pd.DataFrame(invConstViolations['normal']).T\n", "invConstDF3['violation_ratio'] = invConstDF3.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 65, "id": "dietary-venue", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "head: cannot open ‘../../allConstraintsAnalysisWRemoved2/inverseConstraint/normal/claims.P925.incorrect.tsv’ for reading: No such file or directory\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysisWRemoved2/inverseConstraint/normal/claims.P925.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 66, "id": "entire-gauge", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEICAYAAABYoZ8gAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAYuUlEQVR4nO3deZhddX3H8feHhNUEAmRI2eIAghhAkQawLhWLCzuodcEGgbK4tlqwGpEWRAtYy2J9SoEKTwIosqgYBTeQGLUCBkVlkYIQIARIWAIJRjDw7R+/3ySHy9yZk8mcezPz+7yeZ545+/me3znnc889d1NEYGZm5Vir2wWYmVlnOfjNzArj4DczK4yD38ysMA5+M7PCOPjNzArj4C+EpHMl/Uu36xhukmZLOrrbdQy3NW1/SfqepMNrTNcrKSSNHeJ6TpD0laHM20mSJktaKmlMt2sZCgd/haTXS/pfSU9KelzSzyXt3u26BiLpCEk/G2y6iPhgRHxuCMufJ2mhpJdUhh0tafaqLqsbJO0g6QpJj+b9+ltJxzV5wuY2e/PqLGNV9pekGZI+P9R15QeZi/oZ/ipJz0jaJCL2jYiZQ11Hm/XuJWl+dVhEnBoRw/5Ans+T53JYPyXpN5IOWIX5X7BPI+L+iBgXEc8Nd62d4ODPJG0IfBf4MrAJsCXwWeCZbtY1HIYh5MYAHxuGOiSpY8ecpO2AG4EHgF0iYiPgXcBUYHyn6uinriFdDTdoJvCO6oN7dhjw3Yh4vAs1NeEXETEOmACcA3xd0oSuVtQtEeG/9OnlqcDiNuPWAR4nhUffsM2APwI9wF7AfOCTwELgIeAQYD/g//K8J1TmPRm4ArgEWAL8DtgB+HSe/wHgrZXpNwIuyMt9EPg8KYxfAfwJeA5Y2lc/MAP4b+Aa4GngzXnY5yvLPBi4BXgK+AOwT5ttnwdMz9swIQ87Gphdmea1wC+BJ/P/11bGzQb+Dfg5sAx4GRDAh4G78vZ/DtgO+N9cz+XAOnn+jUkPyIuAJ3L3Vi3LP7pN7ZcAVw+y3w8CbgMW52W9omXbPwH8Nm/bZcB6edzEXMvi3DY/JV1IXQw8n7d1aT4mevM2HwXcD8zJy7gCeDgvew6wU2XdK/YXK4+v41l5fB2Zxx0L/Bl4Nq/vO0M8/u8E3l/pHwMsAA5ubee8nScC9+V6LgI2yuP6tnVs7j8SuCPv53uAD+ThL8lt9HyueymwBencuGR1908/23cE8LNK/wa5zt1z/3bAj4HHgEeBr7LyeB9on/Zt5xbArHws3A0cU1nXHsBc0rH9CHBm1/Ou2wWsKX/AhnmnzwT2BTZuGX8O8IVK/8f6TrJ8Yi4H/hVYGziGFFRfI11Z7pQPmm3y9CeTAvttwNh84twLfKYy/72VdX0LOC+fLJsBN1VOoBcc0HnYjHwivI50kq7HC4Nkjzz+LXn8lsCObdplHumB45uV+VcEP+nZ0ROkq8OxwKG5f9M8fjYp7HbK49fOJ8y3c5vvRHpWdR2wLelB7nbg8Dz/psA7SSfqeFJYXlWpbzbtg/9hckC2Gb8D6YHxLbmuT+aTdp3Ktt9EOqk3IQXYB/O404Bz83xrA28AVG2zynp68zZflPfh+nn43+dtWhc4G7ilZR9Wg385cEpe136ki46NW6ddjeP/M8C1lf63kY7htVvbOdd9d95f4/KxcXHLtvYF4v6kUBXwxlz3bpXtmt9Sx8nk4F+d/dPP9h1BPk9ID2ofIT1YbpaHvSyvZ13Sxdwc4OzW86Cffdq3nXNIGbEesGtuu7/J434BHJa7xwGv6XredbuANemPdAU9g3R1tZz0CD4pj9uTFGB9J/dc4N2VA3gZMCb3j88HxZ6VZd8MHFI5uH9UGXcg6Uqidf4JwCRSMK5fmf5Q4PrWA7oyfgZwUT/D+oLkPOCsmm0yjxT8O5MeLHp4YfAfBtzUMs8vgCNy92zglJbxAbyupW0+Vek/o3rStcy7K/BEpX827YP/z7R5JpPH/wtweaV/LdIzqr0q2z6tMv7fgXNz9ymkB6+XtWuzSn9v3uZtB6hlQp5mo372V9/xNbYy/UJygDA8wT85t9dWuf+rwJf6a2fSg/SHK+NenucdS0sg9rOeq4CPVbZroOAf8v7pZ71HkM7pxbnWZeTzt830hwC/rrFPxwJbk551j6+MPw2YkbvnkG4bT1ydfTScf77HXxERd0TEERGxFSnotiBdiRERN5KuVvaStCPpCmFWZfbHYuULPcvy/0cq45eRHu1pM+7RfuYfB7yUdLXzkKTFkhaTgnuzQTbngQHGbU26vVNbRNxKurUxvWXUFqSn/FX3kZ5FDFRL6/b321aSNpB0nqT7JD1FOokm1Hzd4jFg8wHGv6D2iHg+11qt/eFK9x9ZuQ+/SLr6/KGkeyS1tkt/VrSDpDGSTpf0h7xd8/Koie22JSKWt6llQPmdMkvz37n9TRMR95PadpqkcaTge9ELvlnrPr+PFICT+ln3vpJuyG+WWEx6ttJuGwdczyrun/7cEBETSLcPZ5GepfXVOUnS1yU9mPfHJatY5+MRsaQyrHoOHEV69vJ7Sb9clReVm+LgbyMifk+6ktq5MngmMI10lXtlRPypA6U8QLrinxgRE/LfhhGxU1+pbeZrN7xvmdsNoZaTSLehqifeAtKDU9Vk0pVZnVoGczzpinLPiNgQ+Os8XDXmvZZ0m6idF9QuSaQHxQfbzpFFxJKIOD4itiXdhz5O0t59o9vNVul+H+l1ljeTbm/19pUx2LoHWW5/tZ4a6R0o4yLigwNMOpN0bL+TdKvx5jbTte7zyaSr6eqDN5LWBb4B/AfpmfME0utOfds42HEx5P0zkIhYCnwIOEzSq/PgU3M9u+TjbBov3BcD1boA2ERS9Q0DK86BiLgrIg4lXax9AbiynxfSO8rBn0naUdLxkrbK/VuTbqncUJnsEuDtpIOi3dXQsIqIh4AfAmdI2lDSWpK2k/TGPMkjwFaS1lmFxV4AHClp77y8LfOzmMFquZv0Ato/VgZfA+wg6X2Sxkp6DzCF9OxgOIwnPQNYLGkT0oNPXScBr5X0RUl/ASDpZZIuye/muBzYP7fD2qQHmWdILzIPSNIBeVki3QJ7jvQCIKR9sm2N7XqG9KxkA1LwDFWd9dXxDVJgfZb0INDOpcA/SdomPzs4Fbis5RkJpDdFrEu6371c0r7AW1vq3lTSRm3WM+T9M5hI71T6Cul1OUj7YynwpKQtgX9umaVtG0fEA7mm0yStJ+mVpKv8SwAkTZPUk5+xLM6zPd/fsjrFwb/SEtJ9/BslPU0K/FtJBxuwYgf/ivTo/9MO1vZ+0kl0O+mF0ytZeQvjx6R3PTws6dE6C4uIm0jvtjiLFFo/4cVX7e2cQnqBsm9ZjwEHkNrpMdILcAdERK1aajgbWJ/0TosbgO/XnTEi/gD8Felq+jZJT5LCbS6wJCLuJD2Ifzkv/0DgwIh4tsbityc9o1hKek3jnIi4Po87DTgx35r7RJv5LyLdDniQtF9vaDNdHRcAU/L6rhrqQiLiaVL7bEW6x9/OhaR3uswhvSnhT8A/9LO8JaSLhMtJx+37qNwezc+qLwXuybVv0TL/6uyfOs4G9stB/VlgN9L5cDXpBeuqwfbpoaTjbAHpzRgnRcS1edw+pONvKfAl4L0RsayfZXRM3wuVVpOkC4EFEXFit2sxMxuKNe2DJGs0Sb3AO4BXDzKpmdkay7d6apL0OdKtny9GxL3drsfMbKh8q8fMrDC+4jczK8yIuMc/ceLE6O3t7XYZZmYjys033/xoRPS0Dh8Rwd/b28vcuXO7XYaZ2YgiqfVT9YBv9ZiZFcfBb2ZWGAe/mVlhHPxmZoVx8JuZFcbBb2ZWGAe/mVlhHPxmZoVx8JuZFWZEfHJ3dfROv3pF97zT9+9iJWZmawZf8ZuZFcbBb2ZWGAe/mVlhHPxmZoVx8JuZFcbBb2ZWGAe/mVlhHPxmZoVx8JuZFcbBb2ZWGAe/mVlhHPxmZoVx8JuZFcbBb2ZWmMaCX9LWkq6XdLuk2yR9LA/fRNKPJN2V/2/cVA1mZvZiTV7xLweOj4gpwGuAj0iaAkwHrouI7YHrcr+ZmXVIY8EfEQ9FxK9y9xLgDmBL4GBgZp5sJnBIUzWYmdmLdeQev6Re4NXAjcCkiHgoj3oYmNSJGszMLGk8+CWNA74BfDwinqqOi4gAos18x0qaK2nuokWLmi7TzKwYjQa/pLVJof/ViPhmHvyIpM3z+M2Bhf3NGxHnR8TUiJja09PTZJlmZkVp8l09Ai4A7oiIMyujZgGH5+7DgW83VYOZmb3Y2AaX/TrgMOB3km7Jw04ATgcul3QUcB/w7gZrMDOzFo0Ff0T8DFCb0Xs3tV4zMxuYP7lrZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVZmy3C+ik3ulXr+ied/r+XazEzKx7fMVvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlaYxoJf0oWSFkq6tTLsZEkPSrol/+3X1PrNzKx/TV7xzwD26Wf4WRGxa/67psH1m5lZPxoL/oiYAzze1PLNzGxounGP/6OSfptvBW3chfWbmRWt08H/38B2wK7AQ8AZ7SaUdKykuZLmLlq0qEPlmZmNfh0N/oh4JCKei4jngf8B9hhg2vMjYmpETO3p6elckWZmo1xHg1/S5pXetwO3tpvWzMya0diXtEm6FNgLmChpPnASsJekXYEA5gEfaGr9ZmbWv8aCPyIO7WfwBU2tz8zM6vEnd83MCuPgNzMrjIPfzKwwDn4zs8LUCn5JuzRdiJmZdUbdK/5zJN0k6cOSNmq0IjMza1St4I+INwB/B2wN3Czpa5Le0mhlZmbWiNr3+CPiLuBE4FPAG4H/lPR7Se9oqjgzMxt+de/xv1LSWcAdwN8AB0bEK3L3WQ3WZ2Zmw6zuJ3e/DHwFOCEilvUNjIgFkk5spDIzM2tE3eDfH1gWEc8BSFoLWC8i/hgRFzdWnZmZDbu69/ivBdav9G+Qh5mZ2QhTN/jXi4ilfT25e4NmSjIzsybVDf6nJe3W1yPpL4FlA0xvZmZrqLr3+D8OXCFpASDgL4D3NFWUmZk1p1bwR8QvJe0IvDwPujMi/txcWWZm1pRV+SGW3YHePM9ukoiIixqpyszMGlMr+CVdDGwH3AI8lwcH4OA3Mxth6l7xTwWmREQ0WYyZmTWv7rt6biW9oGtmZiNc3Sv+icDtkm4CnukbGBEHNVKVmZk1pm7wn9xkEWZm1jl13875E0kvBbaPiGslbQCMabY0MzNrQt2vZT4GuBI4Lw/aEriqoZrMzKxBdV/c/QjwOuApWPGjLJs1VZSZmTWnbvA/ExHP9vVIGkt6H7+ZmY0wdYP/J5JOANbPv7V7BfCd5soyM7Om1A3+6cAi4HfAB4BrSL+/a2ZmI0zdd/U8D/xP/jMzsxGs7nf13Es/9/QjYtthr8jMzBq1Kt/V02c94F3AJsNfjpmZNa3WPf6IeKzy92BEnE36AXYzMxth6t7q2a3SuxbpGcCqfJe/mZmtIeqG9xmV7uXAPODdw16NmZk1ru67et7UdCFmZtYZdW/1HDfQ+Ig4c3jKMTOzpq3Ku3p2B2bl/gOBm4C7mijKzMyaUzf4twJ2i4glAJJOBq6OiGlNFWZmZs2o+5UNk4BnK/3P5mFmZjbC1L3ivwi4SdK3cv8hwMyBZpB0IXAAsDAids7DNgEuA3rJ7wyKiCdWuWozMxuyuh/g+jfgSOCJ/HdkRJw6yGwzgH1ahk0HrouI7YHrcr+ZmXVQ3Vs9ABsAT0XEl4D5krYZaOKImAM83jL4YFY+U5hJeuZgZmYdVPenF08CPgV8Og9aG7hkCOubFBEP5e6HGeB1AknHSporae6iRYuGsCozM+tP3Sv+twMHAU8DRMQCYPzqrDgiggF+xSsizo+IqRExtaenZ3VWZWZmFXWD/9lqUEt6yRDX94ikzfMyNgcWDnE5ZmY2RHWD/3JJ5wETJB0DXMvQfpRlFnB47j4c+PYQlmFmZqth0LdzShLpLZg7Ak8BLwf+NSJ+NMh8lwJ7ARMlzQdOAk4nPYgcBdyHv+jNzKzjBg3+iAhJ10TELsCAYd8y36FtRu1ddxlmZjb86t7q+ZWk3RutxMzMOqLuJ3f3BKZJmkd6Z49ITwZe2VRhZmbWjAGDX9LkiLgfeFuH6jEzs4YNdsV/FelbOe+T9I2IeGcHajIzswYNdo9fle5tmyzEzMw6Y7DgjzbdZmY2Qg12q+dVkp4iXfmvn7th5Yu7GzZanZmZDbsBgz8ixnSqEDMz64xV+VpmMzMbBRz8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFcfCbmRXGwW9mVhgHv5lZYRz8ZmaFGduNlUqaBywBngOWR8TUbtRhZlairgR/9qaIeLSL6zczK5Jv9ZiZFaZbV/wB/FBSAOdFxPmtE0g6FjgWYPLkyR0ub9X0Tr96Rfe80/fvYiVmZoPr1hX/6yNiN2Bf4COS/rp1gog4PyKmRsTUnp6ezldoZjZKdSX4I+LB/H8h8C1gj27UYWZWoo4Hv6SXSBrf1w28Fbi103WYmZWqG/f4JwHfktS3/q9FxPe7UIeZWZE6HvwRcQ/wqk6v18zMEr+d08ysMA5+M7PCOPjNzArTza9sGLGqH9gyMxtpfMVvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kVxsFvZlYYf3K3RbtP5db9ScVV/RnGpqc3M2vlK34zs8I4+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCuPgNzMrjIPfzKwwxX6Aa1V/PnE4f25xTfjpRn8QzGxgo/kc8RW/mVlhHPxmZoVx8JuZFcbBb2ZWGAe/mVlhHPxmZoVx8JuZFcbBb2ZWmGI/wLWmq/Mhr3YfMFmdX/Wqo926hjJ/nZqGa9tWZzlNWd1ffOtGDWv6r8atzvqG68OVw1lDE23mK34zs8I4+M3MCuPgNzMrjIPfzKwwDn4zs8I4+M3MCtOV4Je0j6Q7Jd0taXo3ajAzK1XHg1/SGOC/gH2BKcChkqZ0ug4zs1J144p/D+DuiLgnIp4Fvg4c3IU6zMyKpIjo7AqlvwX2iYijc/9hwJ4R8dGW6Y4Fjs29LwfuHOIqJwKPDnHe0cJt4DYAtwGU1wYvjYie1oFr7Fc2RMT5wPmruxxJcyNi6jCUNGK5DdwG4DYAt0GfbtzqeRDYutK/VR5mZmYd0I3g/yWwvaRtJK0DvBeY1YU6zMyK1PFbPRGxXNJHgR8AY4ALI+K2Ble52reLRgG3gdsA3AbgNgC68OKumZl1lz+5a2ZWGAe/mVlhRk3wD/Y1EJLWlXRZHn+jpN4ulNmoGm1wnKTbJf1W0nWSXtqNOptU9+tAJL1TUkgadW/tq9MGkt6dj4XbJH2t0zU2rca5MFnS9ZJ+nc+H/bpRZ9dExIj/I71I/AdgW2Ad4DfAlJZpPgycm7vfC1zW7bq70AZvAjbI3R8qsQ3ydOOBOcANwNRu192F42B74NfAxrl/s27X3YU2OB/4UO6eAszrdt2d/BstV/x1vgbiYGBm7r4S2FuSOlhj0wZtg4i4PiL+mHtvIH2GYjSp+3UgnwO+APypk8V1SJ02OAb4r4h4AiAiFna4xqbVaYMANszdGwELOlhf142W4N8SeKDSPz8P63eaiFgOPAls2pHqOqNOG1QdBXyv0Yo6b9A2kLQbsHVEDM+vaq956hwHOwA7SPq5pBsk7dOx6jqjThucDEyTNB+4BviHzpS2Zlhjv7LBmiNpGjAVeGO3a+kkSWsBZwJHdLmUbhtLut2zF+lZ3xxJu0TE4m4W1WGHAjMi4gxJfwVcLGnniHi+24V1wmi54q/zNRArppE0lvT07rGOVNcZtb4KQ9Kbgc8AB0XEMx2qrVMGa4PxwM7AbEnzgNcAs0bZC7x1joP5wKyI+HNE3Av8H+mBYLSo0wZHAZcDRMQvgPVIX+BWhNES/HW+BmIWcHju/lvgx5Ff2RklBm0DSa8GziOF/mi7rwuDtEFEPBkREyOiNyJ6Sa9zHBQRc7tTbiPqnAtXka72kTSRdOvnng7W2LQ6bXA/sDeApFeQgn9RR6vsolER/Pmefd/XQNwBXB4Rt0k6RdJBebILgE0l3Q0cB4yqX/6q2QZfBMYBV0i6RdKo+o6kmm0wqtVsgx8Aj0m6Hbge+OeIGDXPfmu2wfHAMZJ+A1wKHDHKLgQH5K9sMDMrzKi44jczs/oc/GZmhXHwm5kVxsFvZlYYB7+ZWWEc/GZmhXHwm5kV5v8B1GQG2ZdKs9sAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "invConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "markdown", "id": "working-stable", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 55, "id": "saved-twelve", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2e8a241c831b4968ae22d06c22c6e85e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/122 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "code", "execution_count": null, "id": "numerical-month", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "stuck-criticism", "metadata": {}, "source": [ "# Analysis on properties with constraints" ] }, { "cell_type": "code", "execution_count": 26, "id": "driven-reference", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-03 09:14:12 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " PARAS: ['P2302']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->()\" \\\n", " -o ../../constraintsOP/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 39, "id": "exciting-focus", "metadata": {}, "outputs": [], "source": [ "!kgtk unique -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz --column node1 -o ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 42, "id": "flush-romania", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "node1\tlabel\tnode2\r\n", "P10\tcount\t17\r\n", "P1000\tcount\t10\r\n", "P1001\tcount\t26\r\n", "P1002\tcount\t9\r\n", "P1003\tcount\t20\r\n", "P1004\tcount\t33\r\n", "P1005\tcount\t21\r\n", "P1006\tcount\t26\r\n", "P1007\tcount\t19\r\n" ] } ], "source": [ "!head ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 43, "id": "chemical-harris", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props = pd.read_csv(\"../../constraintsOP/claims.constraints_list.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 44, "id": "higher-underground", "metadata": {}, "outputs": [], "source": [ "props2 = props.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 45, "id": "light-appreciation", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8100" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 48, "id": "yellow-helmet", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2336, 8100)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "detected-skiing", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": 32, "id": "processed-perfume", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props2 = pd.read_csv(\"../../constraintsOP/claims.propList.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 33, "id": "increasing-graphics", "metadata": {}, "outputs": [], "source": [ "props2 = props2.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 34, "id": "posted-ukraine", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8193" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 35, "id": "fifth-provision", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2415, 8193)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved2/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "married-heating", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": null, "id": "magnetic-conditions", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "kgtkEnv", "language": "python", "name": "kgtkenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "318px" }, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "oldHeight": 122, "position": { "height": "40px", "left": "1170px", "right": "20px", "top": "120px", "width": "250px" }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "varInspector_section_display": "none", "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }