{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "honest-owner", "metadata": {}, "outputs": [], "source": [ "!kgtk cat -i ../../gdrive-kgtk-dump-2020-12-07/claims.tsv.gz \\\n", " ../../data/removed_statements.tsv \\\n", " -o ../../data/claims.w_removed_statements.tsv" ] }, { "cell_type": "code", "execution_count": 1, "id": "juvenile-ability", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "51f315ee082443c0910cd6f1584aafcc", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1225987894 [00:00(node2), c: (rLabel)-[:P2308]->(parent), d: (node1)-[]->(par), c: (eLabel)-[:P2303]->(eNode)\" \\\n", " --where 'nodeProp.label = rLabel and (par = parent or (rLabel = eLabel and node1 = eNode))' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2, max(parent) as `node1;ancestor`' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.all.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 4, "id": "abstract-retreat", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 22:33:26 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT graph_11_c1.\"id\", graph_11_c1.\"node1\", graph_11_c1.\"label\", graph_11_c1.\"node2\"\r\n", " FROM graph_11 AS graph_11_c1, graph_14 AS graph_14_c2\r\n", " WHERE graph_11_c1.\"node1\"=graph_14_c2.\"node1\"\r\n", " AND (graph_14_c2.\"node2\" IN (?, ?, ?))\r\n", " PARAS: ['Q1238720', 'Q3331189', 'Q47461344']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplitWRemoved/claims.P996.tsv \\\n", " ../../wikidata-20210215/derived.isastar.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q1238720\",\"Q3331189\",\"Q47461344\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.P996.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 7, "id": "strange-truck", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "81289 ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv\r\n" ] } ], "source": [ "!wc -l ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P996.correct.tsv" ] }, { "cell_type": "code", "execution_count": 8, "id": "finnish-hampton", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 22:49:54 sqlstore]: IMPORT graph directly into table graph_15 from /data/wd-correctness/propertiesSplit/claims.P991.tsv ...\n", "[2021-03-12 22:49:54 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT graph_15_c1.\"id\", graph_15_c1.\"node1\", graph_15_c1.\"label\", graph_15_c1.\"node2\"\n", " FROM graph_15 AS graph_15_c1, graph_5 AS graph_5_c2\n", " WHERE graph_15_c1.\"node1\"=graph_5_c2.\"node1\"\n", " AND (graph_5_c2.\"node2\" IN (?))\n", " PARAS: ['Q40231']\n", "---------------------------------------------\n", "[2021-03-12 22:49:55 sqlstore]: CREATE INDEX on table graph_15 column node1 ...\n", "[2021-03-12 22:49:55 sqlstore]: ANALYZE INDEX on table graph_15 column node1 ...\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplitWRemoved/claims.P991.tsv \\\n", " ../../wikidata-20210215/derived.P31P279star.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q40231\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.P991.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P991.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 9, "id": "elegant-reverse", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-03-12 23:54:56 sqlstore]: IMPORT graph directly into table graph_16 from /data/wd-correctness/propertiesSplit/claims.P965.tsv ...\n", "[2021-03-12 23:54:56 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT graph_16_c1.\"id\", graph_5_c2.\"node1\", graph_16_c1.\"label\", graph_16_c1.\"node2\"\n", " FROM graph_16 AS graph_16_c1, graph_5 AS graph_5_c2\n", " WHERE graph_16_c1.\"node1\"=graph_5_c2.\"node1\"\n", " AND (graph_5_c2.\"node2\" IN (?))\n", " PARAS: ['Q6023295']\n", "---------------------------------------------\n", "[2021-03-12 23:54:56 sqlstore]: CREATE INDEX on table graph_16 column node1 ...\n", "[2021-03-12 23:54:56 sqlstore]: ANALYZE INDEX on table graph_16 column node1 ...\n" ] } ], "source": [ "!kgtk --debug query -i ../../propertiesSplitWRemoved/claims.P965.tsv \\\n", " ../../wikidata-20210215/derived.P31P279star.tsv.gz \\\n", " --match \"m: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)\" \\\n", " --where 'par in [\"Q6023295\"]' \\\n", " --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/mandConst001.sqlite3.db ; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.P965.tsv \\\n", " --filter-on ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../constraintsOP/typeConstraint/claims.type-constraints.mandatory.instanceOf.P965.incorrect.tsv" ] }, { "cell_type": "markdown", "id": "matched-strength", "metadata": {}, "source": [ "# Generate Queries" ] }, { "cell_type": "markdown", "id": "black-insured", "metadata": {}, "source": [ "## Type Constraint" ] }, { "cell_type": "markdown", "id": "interior-humor", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 5, "id": "clinical-brunei", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv('../../constraintsOP/typeConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 6, "id": "assured-cleaners", "metadata": {}, "outputs": [], "source": [ "df1 = df.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 7, "id": "sharing-evolution", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1001P2308[Q102496, Q105985, Q1140371, Q1151067, Q119768...
1P1001P2309[Q30208840]
2P1002P2308[Q630010]
3P1002P2309[Q21514624]
4P1004P2308[Q2221906, Q23413, Q3947, Q41176, Q88291]
\n", "
" ], "text/plain": [ " node1 label node2\n", "0 P1001 P2308 [Q102496, Q105985, Q1140371, Q1151067, Q119768...\n", "1 P1001 P2309 [Q30208840]\n", "2 P1002 P2308 [Q630010]\n", "3 P1002 P2309 [Q21514624]\n", "4 P1004 P2308 [Q2221906, Q23413, Q3947, Q41176, Q88291]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.head()" ] }, { "cell_type": "code", "execution_count": 8, "id": "still-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
69P105P2308[Q16521]
70P105P2309[Q21503252]
71P105P2316[Q21502408]
\n", "
" ], "text/plain": [ " node1 label node2\n", "69 P105 P2308 [Q16521]\n", "70 P105 P2309 [Q21503252]\n", "71 P105 P2316 [Q21502408]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1[df1['node1'] == 'P105']" ] }, { "cell_type": "markdown", "id": "solid-browser", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 17, "id": "bright-impossible", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8774a89ab86f413f9a434b082221d163", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4810 [00:00(node2), \" + parentFile + \": (node1)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/const120_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\")\n", "\n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 18, "id": "electrical-agreement", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3743" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 19, "id": "outside-stupid", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,33):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/typeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "competitive-canvas", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 1, "id": "casual-perth", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d3adf3ce540a4ba5bff69b56a357277e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c5d144cbeb1d49598b7a30efc4bfb7af", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1224 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P137074[../../allConstraintsAnalysisWRemoved/typeCons...0.363636
P6686272[../../allConstraintsAnalysisWRemoved/typeCons...0.068966
P378543[../../allConstraintsAnalysisWRemoved/typeCons...0.428571
P233610[../../allConstraintsAnalysisWRemoved/typeCons...0.000000
P4602201813[../../allConstraintsAnalysisWRemoved/typeCons...0.006401
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P1370 7 4 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P6686 27 2 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P3785 4 3 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P2336 1 0 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P4602 2018 13 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "\n", " violation_ratio \n", "P1370 0.363636 \n", "P6686 0.068966 \n", "P3785 0.428571 \n", "P2336 0.000000 \n", "P4602 0.006401 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "competitive-peeing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P889401[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P486601[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P508301[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P403102[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P385504[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P176002[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P688003[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P396801[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P508601[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P417202[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P6128033[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P421702[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P563304[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P529801[../../allConstraintsAnalysisWRemoved/typeCons...1.0
P670003[../../allConstraintsAnalysisWRemoved/typeCons...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P8894 0 1 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P4866 0 1 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P5083 0 1 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P4031 0 2 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P3855 0 4 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P1760 0 2 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P6880 0 3 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P3968 0 1 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P5086 0 1 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P4172 0 2 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P6128 0 33 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P4217 0 2 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P5633 0 4 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P5298 0 1 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P6700 0 3 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "\n", " violation_ratio \n", "P8894 1.0 \n", "P4866 1.0 \n", "P5083 1.0 \n", "P4031 1.0 \n", "P3855 1.0 \n", "P1760 1.0 \n", "P6880 1.0 \n", "P3968 1.0 \n", "P5086 1.0 \n", "P4172 1.0 \n", "P6128 1.0 \n", "P4217 1.0 \n", "P5633 1.0 \n", "P5298 1.0 \n", "P6700 1.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 8, "id": "backed-corruption", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../../allConstraintsAnalysisWRemoved/typeConstraint_Final/normal/claims.type-constraints.instanceOf.P4945.correct.tsv',\n", " '../../allConstraintsAnalysisWRemoved/typeConstraint_Final/normal/claims.type-constraints.instanceOf.P4945.incorrect.tsv']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(typeConstDF.loc['P4945'].paths)" ] }, { "cell_type": "code", "execution_count": 11, "id": "clinical-lawsuit", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 3743.000000\n", "mean 0.367622\n", "std 0.367966\n", "min 0.000000\n", "25% 0.016360\n", "50% 0.240000\n", "75% 0.700000\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 12, "id": "wanted-domestic", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "sufficient-hollywood", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Type Constraint Violation Ratios (<=0.05)')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEICAYAAACwDehOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAbXklEQVR4nO3deZhdVZ3u8e8rIQRkCJBAQ0IoEJShkcGIoJduhIsyyHRFhg5D89BErtiPtnIREDWIjaAtg3odmC5hUCa7NQJKM9toMySMAiJBgiRhCCEDYR5+94+1arFzqFM5VafOOVWV9/M856m9115777XOsN89nVOKCMzMzADe0+kGmJnZ4OFQMDOzwqFgZmaFQ8HMzAqHgpmZFQ4FMzMrHAo2qEk6SdL5bVzfQ5J2bqDezpJmN7Gen0j6Wn/nbxdJO0l6tAXLHSvpT5JWHuhlt5Okf5Z0RqfbMZAcCm0gaUnl8bakVyrjk9rUhvUkXSDpaUkv5g/kKZLe28J13irpn5pZRkScFhENLUPSFEmX9jL9t5K+2UP5vpKekTQiIraMiFubaHJP6/1HSbdXyyLimIg4dSDXk9c1RdIb+b21UNIfJO3Yh/lD0iaVdv5XRHxgoNsJnABcFBGvtGDZSFpJ0oWSFufX9kvLqP8vud7iPN9KlWmzaj6z/1mZ9TxgkqR1WtGPTnAotEFErNr9AP4K7F0pu6zV65e0FvDfwMrAjhGxGrAbMBp4X6vX30u7RrR5lVOBQyWppvww4LKIeLPN7WmVK/J7bQxwC3BVh9uzlLzBPQKoG+A19dftx2qmAJsCGwIfB46XtHud5X+SFFK75vobA6fUVKt+Zj/RXRgRrwK/AQ7vRxsHp4jwo40PYBbwP4GRwAvAVpVp6wAvA2OBnYHZwEnA83m+SZW6KwH/RgqZZ4GfACvXWee3gAeB9/TSro8CdwOL8t+PVqbdCpwK/B54EfhPYEyeNor04Z4PLMzzrgv8K/AW8CqwBPhhrh/AscBjwBO57BzgKWAxMAPYqbLuKcClebgrz39E7vfzwFfztN2B14E38vru76GPK+f+/V2lbM3cxq2rr0/lOT4bmJsfZwMr5Wk7A7MryzkBeDw/Pw8D++fyzfPy38rtWpjLLwK+VZn/aGBmfk9MA9avTAvgmPycLQT+L6A6r2N5vvL4Fnn+sXl8e9IOwkLgaeCHwMg87Xe57ku5rQf10M/NSe+HhcBDwD6VaXvmvr8IzAGOq9PGvwNmLuNzsgoprG8GHu7H52wu8InK+KnA5XXq/gw4rTK+K/BM7We2l3VNAm7p1DZloB8db8Dy9qjZ6PwIOKMy7QvAr/PwzsCbwJl54/T3+cP6gTz9rLzxWAtYDfg18O0667wDOKWXNq0FLMgfwhHAIXl87Tz9VtIG7/2kDeutwOl52mfzulcBVgA+BKxeme+fatYVwA15nSvnskOBtfO6vww8A4zK06bw7lA4L7dja+A1YPPaur309Tzg/Mr4Z4H76rw+38zP3TqkoP4DcGrl9aluLD8DrE86+j4ov1br5Wn/CNxe046LyKEA7EIKuO3ya/0D4Hc1z9k1pCO7CcA8YPc6/as+XyOB0/OyR+SyDwE75Oe6C3gE+GLNujapjJd+AiuSguukvOxdSAHQ/Z58mhzopLDdrk4bjwWurTNtx/waLcjvk0lUdnZIn5mFdR4PVNYdwLqV+Q4AHqyzzvuBgyrjY/L83e//WaQdr3mkHaKta+bfDnih09uWgXr49FFnTQUOqZzOOAy4pKbO1yLitYi4DbgWODDXnwz8S0S8EBEvAqcBB9dZz9qkD2w9ewGPRcQlEfFmRPwc+BOwd6XO/4uIP0c6B3wlsE0ufyMvf5OIeCsiZkTE4mX0+9u53a8ARMSlETE/r/t7pA1jb+exT4mIVyLiftIHeutlrK9qKnCApFF5/PBc1pNJwDcj4rmImEc6pXBYTxUj4qqImBsRb0fEFaS9+u0bbNMk4MKIuCciXgNOBHaU1FWpc3pELIyIv5JOCW3Ty/IOlLQQeIV0BHJA5FNj+fW5Iz/Xs4CfknY4GrEDsGpuy+sRcTMprA7J098AtpC0ekQsiIh76ixnNClMCkkHSvoTKSyfIB1B7xYRl0XlukNEfC4iRtd5fDBXWzX/XVRZxSLSzlNPVu2hLpX6k0gBuiHpub9e0uhK/ReBNeose8hxKHRQRNxJOl20s6TNgE1Ie//dFkTES5XxJ0l7o2NJe+Yz8sXEhcBvc3lP5gPr9dKU9fOyq54ExlXGn6kMv8w7H7xLgOuByyXNlfQdSSv2si5Ip4oKScdJekTSotyXNUh7a/XUa8syRcTtpD3n/SS9j7Th/lmd6rXPS/fz/y6SDpd0X+X1+Ft670Pd9UTEEtJr1sjz35MrI2I06TTeH0lHB93tfL+ka7ovqpJ2JvrSzqci4u1KWfV98mnSKaQnJd3WywXuBbx7Az0+L+c+UtA/Q/8tyX9Xr5StTk0Q1dSvrUt3/Yj4fd4JeTkivk06KtmpUn81lg6VIc2h0HlTSadPDgOujnThqtuaNXcHTSCdK32etBe4ZWUvaY1IFxd7ciOwv6R6r/dc0l5Q1QTSeeFeRcQbEXFKRGxBui7xKd656FbvJ3hLuaSdgOOBA4E188ZsEVB7MbgRjf7k78W5jYcC10fEs3Xq1T4v3c//UiRtSDrl8XnSKYfRpI1xdx+W1a6l1pNf87Vp4PnvTUQ8TzqinCKpe6fgx6SjwE0jYnXSqaBGn+u5wAY176PyPomIuyNiX9Lptl+Sjih78gDpVGS1rWeSQuEm4KvAbElnSdq2Wi/fyrukzuOhvKwFpCPj6hHk1qRrID15qIe6z0bE/Dr1g6Wfs81JQTYsOBQ671Jgf9IG6uIepp8iaWTeeH4KuCrvqZ0HnNV9K5ykcfkuip6cSdr7mZo3YN31z5T0QeA64P2S/kHSCEkHkS5QXrOsxkv6uKStJK1AulD8BtC9J/ks6U6O3qxGunYyDxgh6essvdfWF88CXb2EX7eLSRf7j6b+qSOAnwMnK91TPwb4Oj3fMfNe0oZiHoCkI0lHCtV2jZc0spf1HClpm3xnzmnAnfn0TlMi4lHSkdzxuWg10uu0JB+d/u+aWXp7zbqPbI+XtKLS9zn2Jh0ljpQ0SdIaEfFGXsfbdZZzFzBaUvVIiIhYHBHnRsRHSae0XgV+LemmSp1jonI3X81jy8riLia9dmvmfh5NOjXVk4uBoyRtkU8LndxdV9IESR/L/Rsl6f+Qjqx+X5n/70l3IA0LDoUOi4ingHtIG5X/qpn8DOlQey5wGXBMRPwpT/sK6aLfHfk0wI3UOQ8fES+Q9uLfAO6U9CJpj2wR6S6Q+aTA+TLptMXxwKfynuay/A1wNWkj8AhwG+9cFzmHdP5+gaTv15n/etKprz+TTkW8Ss3ppT7ovvVyvqR657PJG9s/kDbm0+rVI921NZ20Z/sg6XX6Vg/Lexj4HumunmeBrVh6o3EzaW/0GUnvek4j4kbga8AvSHu476P+9aH++C4wOe9AHAf8A+nUyHnAFTV1p5B2HhZKOrCmna+TQmAP0tHqj4DDK+/Jw4BZ+f14DOlc/Lvk5VxE2hHqUUQ8GhEnko5ETm64p+/4BunmiCdJ78nvRsRvoWzol0iakNf1W+A7pOsFf83zfCMvZzXS0dUC0hHR7sAe3UcR+drUnvS+czGkKKLRI25rFUkXAnMj4uRK2c6ku0jGd6pdZq0iaSxpJ2jbaNEX2NpB0j8DG0TE8cusPES0+8tDViPfYfK/gG2XUdVs2Mh3c23W6XY0KyJ+0Ok2DDSfPuogSaeSLkh+NyKe6HR7zMx8+sjMzAofKZiZWTGkrymMGTMmurq6Ot0MM7MhZcaMGc9HRI9fdh3SodDV1cX06dM73QwzsyFFUu0vGBQ+fWRmZoVDwczMCoeCmZkVDgUzMyscCmZmVjgUzMyscCiYmVnhUDAzs8KhYGZmxZD+RnMzuk64tgzPOn2vDrbEzGzw8JGCmZkVDgUzMyscCmZmVjgUzMyscCiYmVnhUDAzs8KhYGZmhUPBzMwKh4KZmRUOBTMzKxwKZmZWOBTMzKxwKJiZWeFQMDOzwqFgZmaFQ8HMzAqHgpmZFQ4FMzMrHApmZlY4FMzMrHAomJlZ4VAwM7PCoWBmZoVDwczMCoeCmZkVLQ8FSStIulfSNXl8I0l3Spop6QpJI3P5Snl8Zp7e1eq2mZnZ0tpxpPAF4JHK+BnAWRGxCbAAOCqXHwUsyOVn5XpmZtZGLQ0FSeOBvYDz87iAXYCrc5WpwH55eN88Tp6+a65vZmZt0uojhbOB44G38/jawMKIeDOPzwbG5eFxwFMAefqiXH8pkiZLmi5p+rx581rYdDOz5U/LQkHSp4DnImLGQC43Is6NiIkRMXHs2LEDuWgzs+XeiBYu+2PAPpL2BEYBqwPnAKMljchHA+OBObn+HGADYLakEcAawPwWts/MzGq07EghIk6MiPER0QUcDNwcEZOAW4ADcrUjgF/l4Wl5nDz95oiIVrXPzMzerRPfU/gK8CVJM0nXDC7I5RcAa+fyLwEndKBtZmbLtVaePioi4lbg1jz8F2D7Huq8CnymHe0xM7Oe+RvNZmZWOBTMzKxwKJiZWeFQMDOzwqFgZmaFQ8HMzAqHgpmZFQ4FMzMrHApmZlY4FMzMrHAomJlZ4VAwM7PCoWBmZoVDwczMCoeCmZkVDgUzMyscCmZmVjgUzMyscCiYmVnhUDAzs8KhYGZmhUPBzMwKh4KZmRUOBTMzKxwKZmZWOBTMzKxwKJiZWeFQMDOzwqFgZmaFQ8HMzAqHgpmZFQ4FMzMrHApmZlY4FMzMrHAomJlZ4VAwM7OiZaEgaZSkuyTdL+khSafk8o0k3SlppqQrJI3M5Svl8Zl5eler2mZmZj1r5ZHCa8AuEbE1sA2wu6QdgDOAsyJiE2ABcFSufxSwIJefleuZmVkbtSwUIlmSR1fMjwB2Aa7O5VOB/fLwvnmcPH1XSWpV+8zM7N1aek1B0gqS7gOeA24AHgcWRsSbucpsYFweHgc8BZCnLwLW7mGZkyVNlzR93rx5rWy+mdlyp6FQkLRVfxYeEW9FxDbAeGB7YLP+LKdmmedGxMSImDh27NhmF2dmZhWNHin8KF80/pykNfq6kohYCNwC7AiMljQiTxoPzMnDc4ANAPL0NYD5fV2XmZn1X0OhEBE7AZNIG+0Zkn4mabfe5pE0VtLoPLwysBvwCCkcDsjVjgB+lYen5XHy9JsjIhrvipmZNWvEsqskEfGYpJOB6cD3gW3zheCTIuLfe5hlPWCqpBVI4XNlRFwj6WHgcknfAu4FLsj1LwAukTQTeAE4uN+9MjOzfmkoFCR9EDgS2It0wXjviLhH0vrAfwPvCoWIeADYtofyv5CuL9SWvwp8pk+tNzOzAdXokcIPgPNJRwWvdBdGxNx89GBmZsNAo6GwF/BKRLwFIOk9wKiIeDkiLmlZ68zMrK0avfvoRmDlyvgquczMzIaRRkNhVOXbyeThVVrTJDMz65RGQ+ElSdt1j0j6EPBKL/XNzGwIavSawheBqyTNBQT8DXBQqxplZmad0VAoRMTdkjYDPpCLHo2IN1rXLDMz64SGv7wGfBjoyvNsJ4mIuLglrTIzs45o9MtrlwDvA+4D3srFATgUzMyGkUaPFCYCW/i3iMzMhrdG7z76I+nispmZDWONHimMAR6WdBfp32wCEBH7tKRVZmbWEY2GwpRWNsLMzAaHRm9JvU3ShsCmEXGjpFWAFVrbNDMza7dG/x3n0cDVwE9z0Tjgly1qk5mZdUijF5qPBT4GLIb0D3eAdVrVKDMz64xGQ+G1iHi9eyT/D2XfnmpmNsw0Ggq3SToJWDn/b+argF+3rllmZtYJjYbCCcA84EHgs8B1gP/jmpnZMNPo3UdvA+flh5mZDVON/vbRE/RwDSEiNh7wFpmZWcf05bePuo0CPgOsNfDNMTOzTmromkJEzK885kTE2cBerW2amZm1W6Onj7arjL6HdOTQl//FYGZmQ0CjG/bvVYbfBGYBBw54a8zMrKMavfvo461uiJmZdV6jp4++1Nv0iDhzYJpjZmad1Je7jz4MTMvjewN3AY+1olFmZtYZjYbCeGC7iHgRQNIU4NqIOLRVDTMzs/Zr9Gcu1gVer4y/nsvMzGwYafRI4WLgLkn/kcf3A6a2pEVmZtYxjd599K+SfgPslIuOjIh7W9csMzPrhEZPHwGsAiyOiHOA2ZI2alGbzMysQxr9d5zfAL4CnJiLVgQubVWjzMysMxo9Utgf2Ad4CSAi5gKrtapRZmbWGY2GwusREeSfz5b03mXNIGkDSbdIeljSQ5K+kMvXknSDpMfy3zVzuSR9X9JMSQ/U/N6SmZm1QaOhcKWknwKjJR0N3Miy/+HOm8CXI2ILYAfgWElbkP6L200RsSlwUx4H2APYND8mAz/uU0/MzKxpy7z7SJKAK4DNgMXAB4CvR8QNvc0XEU8DT+fhFyU9AowD9gV2ztWmAreSrlfsC1ycj0jukDRa0np5OWZm1gbLDIWICEnXRcRWQK9BUI+kLmBb4E5g3cqG/hne+RLcOOCpymyzc9lSoSBpMulIggkTJvSnOWZmVkejp4/ukfTh/qxA0qrAL4AvRsTi6rTqdYpGRcS5ETExIiaOHTu2P00yM7M6Gv1G80eAQyXNIt2BJNI2/YO9zSRpRVIgXBYR/56Ln+0+LSRpPeC5XD4H2KAy+/hcZmZmbdJrKEiaEBF/BT7Z1wXnaxEXAI/U/LT2NOAI4PT891eV8s9LupwUQot8PcHMrL2WdaTwS9Kvoz4p6RcR8ek+LPtjwGHAg5Luy2UnkcLgSklHAU/yzn9wuw7YE5gJvAwc2Yd1mZnZAFhWKKgyvHFfFhwRt9fMX7VrD/UDOLYv6zAzs4G1rAvNUWfYzMyGoWUdKWwtaTFpj3/lPAzvXGhevaWtMzOztuo1FCJihXY1xMzMOq8vP51tZmbDnEPBzMwKh4KZmRUOBTMzKxwKZmZWOBTMzKxwKJiZWeFQMDOzwqFgZmaFQ8HMzAqHgpmZFQ4FMzMrHApmZlY4FMzMrHAomJlZ4VAwM7PCoWBmZoVDwczMCoeCmZkVDgUzMyscCmZmVjgUzMyscCiYmVnhUDAzs8KhYGZmhUPBzMwKh4KZmRUOBTMzKxwKZmZWOBTMzKxwKJiZWeFQMDOzomWhIOlCSc9J+mOlbC1JN0h6LP9dM5dL0vclzZT0gKTtWtUuMzOrr5VHChcBu9eUnQDcFBGbAjflcYA9gE3zYzLw4xa2y8zM6mhZKETE74AXaor3Babm4anAfpXyiyO5Axgtab1Wtc3MzHrW7msK60bE03n4GWDdPDwOeKpSb3YuexdJkyVNlzR93rx5rWupmdlyqGMXmiMigOjHfOdGxMSImDh27NgWtMzMbPnV7lB4tvu0UP77XC6fA2xQqTc+l5mZWRu1OxSmAUfk4SOAX1XKD893Ie0ALKqcZjIzszYZ0aoFS/o5sDMwRtJs4BvA6cCVko4CngQOzNWvA/YEZgIvA0e2ql1mZlZfy0IhIg6pM2nXHuoGcGyr2mJmZo3xN5rNzKxwKJiZWeFQMDOzwqFgZmaFQ8HMzAqHgpmZFQ4FMzMrHApmZlY4FMzMrHAomJlZ4VAwM7PCoWBmZoVDwczMCoeCmZkVDgUzMyscCmZmVjgUzMyscCiYmVnhUDAzs8KhYGZmhUPBzMwKh4KZmRUOBTMzKxwKZmZWOBTMzKxwKJiZWeFQMDOzwqFgZmbFiE43wIaGrhOuLcOzTt+rgy0xs1bykYKZmRU+UmB47AUPhz6YWec5FGpUN669qW54+7pBbqR+vTrt3Pg3+lyY2fDhUOin4bTBbEWoNTv/YAhFs+WRQ2GA9TUsGqk/UAHUinX1Vr/exrwV2h1sfeUws6HCoTCE1dvQDqWjmL4eNfR1mVWDcWPssLDBxqFgLdPOo4OBqj9YTm01E2yDLWgGW3usd4MqFCTtDpwDrACcHxGnd7hJ1kadOsJp5ohrKB2V1dPOU2+1z9dAhUSrg2eglj8UjmIVEZ1uAwCSVgD+DOwGzAbuBg6JiIfrzTNx4sSYPn16v9Y3HD7MZn3RzDWeRuYdqNN/zbajUwbq+a2dvxWBIWlGREzscdogCoUdgSkR8ck8fiJARHy73jwOBTNbXjUTFr2FwmA6fTQOeKoyPhv4SG0lSZOByXl0iaRH+7m+McDz/Zx3qHKflw/u83JAZzTV5w3rTRhModCQiDgXOLfZ5UiaXi8phyv3efngPi8fWtXnwfTbR3OADSrj43OZmZm1yWAKhbuBTSVtJGkkcDAwrcNtMjNbrgya00cR8aakzwPXk25JvTAiHmrhKps+BTUEuc/LB/d5+dCSPg+au4/MzKzzBtPpIzMz6zCHgpmZFcMyFCTtLulRSTMlndDD9JUkXZGn3ympqzLtxFz+qKRPtrXhTehvnyWtLekWSUsk/bDtDW9CE33eTdIMSQ/mv7u0vfH91ESft5d0X37cL2n/tje+H5r5LOfpE/J7+7i2NbpJTbzGXZJeqbzOP+lXAyJiWD1IF6kfBzYGRgL3A1vU1Pkc8JM8fDBwRR7eItdfCdgoL2eFTvepxX1+L/A/gGOAH3a6L23q87bA+nn4b4E5ne5PG/q8CjAiD68HPNc9PlgfzfS3Mv1q4CrguE73pw2vcRfwx2bbMByPFLYHZkbEXyLideByYN+aOvsCU/Pw1cCukpTLL4+I1yLiCWBmXt5g1+8+R8RLEXE78Gr7mjsgmunzvRExN5c/BKwsaaW2tLo5zfT55Yh4M5ePAobCHSbNfJaRtB/wBOk1Hiqa6vNAGI6h0NPPZYyrVyd/UBYBazc472DUTJ+HqoHq86eBeyLitRa1cyA11WdJH5H0EPAgcEwlJAarfvdX0qrAV4BT2tDOgdTs+3ojSfdKuk3STv1pwKD5noJZu0naEjgD+ESn29IOEXEnsKWkzYGpkn4TEUPtCLFRU4CzImLJAO5ED3ZPAxMiYr6kDwG/lLRlRCzuy0KG45FCIz+XUepIGgGsAcxvcN7BqJk+D1VN9VnSeOA/gMMj4vGWt3ZgDMjrHBGPAEtI11MGs2b6+xHgO5JmAV8ETspfjh3s+t3nfNp7PkBEzCBdm3h/XxswHEOhkZ/LmAYckYcPAG6OdKVmGnBwvrq/EbApcFeb2t2MZvo8VPW7z5JGA9cCJ0TE79vV4AHQTJ83yhsQJG0IbAbMak+z+63f/Y2InSKiKyK6gLOB0yJiKNxd18xrPFbp/9IgaWPS9usvfW5Bp6+2t+IB7En6hz2PA1/NZd8E9snDo0h3JMwkbfQ3rsz71Tzfo8Aene5Lm/o8C3iBtPc4m5q7HQbro799Bk4GXgLuqzzW6XR/Wtznw0gXXO8D7gH263RfWtnfmmVMYYjcfdTka/zpmtd47/6s3z9zYWZmxXA8fWRmZv3kUDAzs8KhYGZmhUPBzMwKh4KZmRUOBTMzKxwKZmZW/H9rPttkmmL4cAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "typeConstDF[typeConstDF['violation_ratio'] <= 0.05].violation_ratio.plot.hist(bins=100).set_title(\"Type Constraint Violation Ratios (<=0.05)\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "minor-marshall", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/3743\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(typeConstDF['violation_ratio'] >= 5.286054)}/{len(typeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "special-consensus", "metadata": {}, "outputs": [], "source": [ "# typeConstDF.sort_values(by=['incorrect'],ascending=False).head(5).paths.values" ] }, { "cell_type": "code", "execution_count": 16, "id": "excited-person", "metadata": {}, "outputs": [], "source": [ "# !cat ../../allConstraintsAnalysisWRemoved/typeConstraint/normal/claims.type-constraints.instanceOf.P953.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 17, "id": "revolutionary-violence", "metadata": {}, "outputs": [], "source": [ "for key1 in typeConstViolations.keys():\n", " typeConstViolations[key1]['correct'] = typeConstViolations[key1]['instanceOf']['correct'] + typeConstViolations[key1]['subclass']['correct'] + typeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " typeConstViolations[key1]['incorrect'] = typeConstViolations[key1]['instanceOf']['incorrect'] + typeConstViolations[key1]['subclass']['incorrect'] + typeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " typeConstViolations[key1]['VR'] = typeConstViolations[key1]['incorrect'] / (typeConstViolations[key1]['correct'] + typeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 18, "id": "emotional-favorite", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 46445731, 'incorrect': 865083},\n", " 'subclass': {'correct': 2067, 'incorrect': 70},\n", " 'instanceOfOrSubclass': {'correct': 1735862, 'incorrect': 13873},\n", " 'propCount': 612,\n", " 'correct': 48183660,\n", " 'incorrect': 879026,\n", " 'VR': 0.017916385580683456},\n", " 'suggestion': {'instanceOf': {'correct': 74911, 'incorrect': 24201},\n", " 'subclass': {'correct': 0, 'incorrect': 0},\n", " 'instanceOfOrSubclass': {'correct': 24245, 'incorrect': 3485},\n", " 'propCount': 29,\n", " 'correct': 99156,\n", " 'incorrect': 27686,\n", " 'VR': 0.21827155043282193},\n", " 'normal': {'instanceOf': {'correct': 426732789, 'incorrect': 5965156},\n", " 'subclass': {'correct': 108191, 'incorrect': 17963},\n", " 'instanceOfOrSubclass': {'correct': 68863493, 'incorrect': 996342},\n", " 'propCount': 3102,\n", " 'correct': 495704473,\n", " 'incorrect': 6979461,\n", " 'VR': 0.01388439241426005}}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstViolations" ] }, { "cell_type": "code", "execution_count": 19, "id": "aggregate-impact", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratiototal
P2093148843205927027[../../allConstraintsAnalysisWRemoved/typeCons...0.006190149770232
P147644059166208472[../../allConstraintsAnalysisWRemoved/typeCons...0.00470944267638
P57739990807165864[../../allConstraintsAnalysisWRemoved/typeCons...0.00413040156671
P143337028672112955[../../allConstraintsAnalysisWRemoved/typeCons...0.00304137141627
P121533425605316565[../../allConstraintsAnalysisWRemoved/typeCons...0.00938233742170
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2093 148843205 927027 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P1476 44059166 208472 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P577 39990807 165864 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P1433 37028672 112955 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "P1215 33425605 316565 [../../allConstraintsAnalysisWRemoved/typeCons... \n", "\n", " violation_ratio total \n", "P2093 0.006190 149770232 \n", "P1476 0.004709 44267638 \n", "P577 0.004130 40156671 \n", "P1433 0.003041 37141627 \n", "P1215 0.009382 33742170 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "typeConstDF['total'] = typeConstDF['correct'] + typeConstDF['incorrect']\n", "typeConstDF.sort_values(by=['total'],ascending=False).head()" ] }, { "cell_type": "markdown", "id": "bearing-kruger", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "assumed-toner", "metadata": {}, "outputs": [], "source": [ "# from tqdm.notebook import tqdm\n", "# import os.path\n", "\n", "# cnt = 0\n", "# fCnt = 1\n", "# for prop in tqdm(df1.node1.unique()):\n", "# try:\n", "# if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv\")):\n", "# continue\n", "# relation = df1[(df1['node1'] == prop) & (df1['label'] == 'P2309')].node2.values[0][0]\n", "# type1 = df1[(df1['node1'] == prop) & (df1['label'] == 'P2316')].node2.values\n", "\n", "# parents = df1[(df1['node1'] == prop) & (df1['label'] == 'P2308')].node2.values[0]\n", "# exceptions = df1[(df1['node1'] == prop) & (df1['label'] == 'P2303')].node2.values\n", "\n", "# # print(prop, relation, type1, parents, exceptions)\n", "\n", "# if relation == \"Q21503252\":\n", "# parentFile = \"P31P279star\"\n", "# parentTitle = 'instanceOf'\n", "# elif relation == \"Q21514624\":\n", "# parentFile = \"P279star\"\n", "# parentTitle = 'subclass'\n", "# else:\n", "# parentFile = \"isastar\"\n", "# parentTitle = 'instanceOfOrSubclass'\n", "\n", "# if len(type1) != 0 and type1[0][0] == \"Q21502408\":\n", "# typeVal = \"mandatory\"\n", "# elif len(type1) != 0 and type1[0][0] == \"Q62026391\":\n", "# typeVal = \"suggestion\"\n", "# else:\n", "# typeVal = \"normal\"\n", "\n", "# if len(exceptions):\n", "# exceptionPart = \"or node1 in \" + str(exceptions[0]).replace(\"'\",'\"')\n", "# else:\n", "# exceptionPart = \"\"\n", " \n", "# if cnt % 100 == 0:\n", "# fOP = open(\"../../propertiesSplitWRemoved/checkViolations/TimedTypeConstraintValidator\" + str(fCnt) + \".sh\",\"w\")\n", "# fCnt += 1\n", " \n", "# fOP.write(\"{ time kgtk --debug query -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", "# ../../wikidata-20210215/derived.\" + parentFile + \".tsv.gz \\\n", "# --match 'm: (node1)-[nodeProp]->(node2), d: (node1)-[]->(par)' \\\n", "# --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", "# --return 'nodeProp.id, node1, nodeProp.label, node2' \\\n", "# -o ../../allConstraintsAnalysisWRemoved/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", "# --graph-cache ~/sqlite3_caches/const2123_\" + str(fCnt) + \".sqlite3.db; } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/TimedTypeConstraint_TimedTypeConstraintValidator\" + str(fCnt) + \".txt ; \\\n", "# kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", "# --filter-on ../../allConstraintsAnalysisWRemoved/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", "# --filter-mode NONE \\\n", "# --input-keys node1 label \\\n", "# --filter-keys node1 label \\\n", "# -o ../../allConstraintsAnalysisWRemoved/TimedTypeConstraint/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv\\n\")\n", "\n", "# cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", " " ] }, { "cell_type": "code", "execution_count": 93, "id": "veterinary-fault", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "52944ea021934d23b3d4ab3fb1f091f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/122 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for type constraint checks\")" ] }, { "cell_type": "markdown", "id": "intense-computer", "metadata": {}, "source": [ "## Value Type Constraint" ] }, { "cell_type": "markdown", "id": "animated-companion", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 20, "id": "static-profit", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "dfValueType = pd.read_csv('../../constraintsOP/valuetypeConstraint/claims.type-constraints_all1.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 21, "id": "worthy-malawi", "metadata": {}, "outputs": [], "source": [ "dfValueType = dfValueType.groupby(['node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 22, "id": "eleven-tiffany", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
0P1000P2308[Q1241356]
1P1000P2309[Q30208840]
2P1001P2308[Q20926517, Q2881272, Q2882257, Q3624078, Q389...
3P1001P2309[Q30208840]
4P1002P2308[Q2576663]
\n", "
" ], "text/plain": [ " node1 label node2\n", "0 P1000 P2308 [Q1241356]\n", "1 P1000 P2309 [Q30208840]\n", "2 P1001 P2308 [Q20926517, Q2881272, Q2882257, Q3624078, Q389...\n", "3 P1001 P2309 [Q30208840]\n", "4 P1002 P2308 [Q2576663]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType.head()" ] }, { "cell_type": "code", "execution_count": 23, "id": "expired-stuff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2308', 'P2309', 'P2303', 'P2316', 'P6607', 'P2304'], dtype=object)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType['label'].unique()" ] }, { "cell_type": "code", "execution_count": 24, "id": "imposed-newsletter", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [node1, label, node2]\n", "Index: []" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType['label'] == '2316']" ] }, { "cell_type": "code", "execution_count": 25, "id": "answering-alabama", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
330P1659P2308[Q18616576]
331P1659P2309[Q21503252]
332P1659P2316[Q21502408]
\n", "
" ], "text/plain": [ " node1 label node2\n", "330 P1659 P2308 [Q18616576]\n", "331 P1659 P2309 [Q21503252]\n", "332 P1659 P2316 [Q21502408]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType['node1'] == 'P1659']" ] }, { "cell_type": "code", "execution_count": 26, "id": "danish-blackberry", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
node1labelnode2
2031P991P2308[Q5, Q7210356]
2032P991P2309[Q21503252]
\n", "
" ], "text/plain": [ " node1 label node2\n", "2031 P991 P2308 [Q5, Q7210356]\n", "2032 P991 P2309 [Q21503252]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfValueType[dfValueType.node1 == 'P991']" ] }, { "cell_type": "markdown", "id": "digital-harvard", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 29, "id": "white-badge", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b96db3afdc724a0496617ce8c5838e6a", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/932 [00:00(node2), \" + parentFile + \": (node2)-[]->(nodex), P279star: (nodex)-[]->(par)' \\\n", " --where 'par in \" + str(parents).replace(\"'\",'\"') + \" \" + exceptionPart + \"' \\\n", " --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/const112_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + typeVal + \"/claims.type-constraints.\" + parentTitle + \".\"+ prop +\".incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\")\n", " \n", " cnt += 1\n", " except:\n", " print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 30, "id": "qualified-cursor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "904" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 31, "id": "simplified-cameroon", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,9):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/valueTypeConstraintValidator\"+str(i)+\".sh\")\n", " " ] }, { "cell_type": "markdown", "id": "spectacular-warner", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 20, "id": "valid-defense", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c502a0c289de4c23b2fa3b794a7ba160", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "11d7ec28a49f4cdaa883b933b055cfaa", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/216 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P85245264[../../allConstraintsAnalysisWRemoved/valuetyp...0.000883
P85316004[../../allConstraintsAnalysisWRemoved/valuetyp...0.002494
P23024791826[../../allConstraintsAnalysisWRemoved/valuetyp...0.000542
P309275347[../../allConstraintsAnalysisWRemoved/valuetyp...0.000928
P30961119310[../../allConstraintsAnalysisWRemoved/valuetyp...0.000893
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P852 4526 4 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P853 1600 4 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P2302 47918 26 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P3092 7534 7 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P3096 11193 10 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "\n", " violation_ratio \n", "P852 0.000883 \n", "P853 0.002494 \n", "P2302 0.000542 \n", "P3092 0.000928 \n", "P3096 0.000893 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.head()" ] }, { "cell_type": "code", "execution_count": 26, "id": "neural-trail", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P50080341961[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P610409808[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P254501378[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P26680179[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P7374044[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P3028015[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P2839015[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P3027013[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P2127012[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P538010[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P224106[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P442506[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P619105[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P653305[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
P653405[../../allConstraintsAnalysisWRemoved/valuetyp...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5008 0 341961 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P6104 0 9808 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P2545 0 1378 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P2668 0 179 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P7374 0 44 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P3028 0 15 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P2839 0 15 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P3027 0 13 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P2127 0 12 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P538 0 10 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P2241 0 6 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P4425 0 6 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P6191 0 5 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P6533 0 5 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "P6534 0 5 [../../allConstraintsAnalysisWRemoved/valuetyp... \n", "\n", " violation_ratio \n", "P5008 1.0 \n", "P6104 1.0 \n", "P2545 1.0 \n", "P2668 1.0 \n", "P7374 1.0 \n", "P3028 1.0 \n", "P2839 1.0 \n", "P3027 1.0 \n", "P2127 1.0 \n", "P538 1.0 \n", "P2241 1.0 \n", "P4425 1.0 \n", "P6191 1.0 \n", "P6533 1.0 \n", "P6534 1.0 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF.sort_values(by=['violation_ratio','incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 27, "id": "cutting-polyester", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 904.000000\n", "mean 0.098485\n", "std 0.214803\n", "min 0.000000\n", "25% 0.001492\n", "50% 0.011225\n", "75% 0.063950\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valTypeConstDF['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 28, "id": "alert-receiver", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios')" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF['violation_ratio'].plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 29, "id": "italian-motel", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Value Type Constraint Violation Ratios (<=0.04)')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "valTypeConstDF[valTypeConstDF['violation_ratio'] <= 0.04].violation_ratio.plot.hist(bins=100).set_title(\"Value Type Constraint Violation Ratios (<=0.04)\")" ] }, { "cell_type": "code", "execution_count": 30, "id": "prescription-ceramic", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of constraints whose violation ratio is greater than mean :0/904\n" ] } ], "source": [ "print(f\"No. of constraints whose violation ratio is greater than mean :{sum(valTypeConstDF['violation_ratio'] >= 3.950680)}/{len(valTypeConstDF)}\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "quiet-gardening", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# valTypeConstDF.sort_values(by=['violation_ratio'],ascending=False).head().paths.values" ] }, { "cell_type": "code", "execution_count": 32, "id": "documentary-pipeline", "metadata": {}, "outputs": [], "source": [ "# !head ../../allConstraintsAnalysisWRemoved/typeConstraint/normal/claims.type-constraints.instanceOf.P7535.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 33, "id": "tutorial-mineral", "metadata": {}, "outputs": [], "source": [ "for key1 in valueTypeConstViolations.keys():\n", " valueTypeConstViolations[key1]['correct'] = valueTypeConstViolations[key1]['instanceOf']['correct'] + valueTypeConstViolations[key1]['subclass']['correct'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['correct']\n", " valueTypeConstViolations[key1]['incorrect'] = valueTypeConstViolations[key1]['instanceOf']['incorrect'] + valueTypeConstViolations[key1]['subclass']['incorrect'] + valueTypeConstViolations[key1]['instanceOfOrSubclass']['incorrect']\n", " valueTypeConstViolations[key1]['VR'] = valueTypeConstViolations[key1]['incorrect'] / (valueTypeConstViolations[key1]['correct'] + valueTypeConstViolations[key1]['incorrect'])\n", " " ] }, { "cell_type": "code", "execution_count": 34, "id": "satellite-concern", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "{'mandatory': {'instanceOf': {'correct': 11564885, 'incorrect': 8245},\n", " 'subclass': {'correct': 55983, 'incorrect': 28},\n", " 'instanceOfOrSubclass': {'correct': 13090, 'incorrect': 137},\n", " 'propCount': 108,\n", " 'correct': 11633958,\n", " 'incorrect': 8410,\n", " 'VR': 0.0007223616363956198},\n", " 'suggestion': {'instanceOf': {'correct': 46189, 'incorrect': 659},\n", " 'subclass': {'correct': 127, 'incorrect': 20},\n", " 'instanceOfOrSubclass': {'correct': 0, 'incorrect': 0},\n", " 'propCount': 5,\n", " 'correct': 46316,\n", " 'incorrect': 679,\n", " 'VR': 0.01444834556867752},\n", " 'normal': {'instanceOf': {'correct': 94112173, 'incorrect': 842434},\n", " 'subclass': {'correct': 4674914, 'incorrect': 9777},\n", " 'instanceOfOrSubclass': {'correct': 77686561, 'incorrect': 289299},\n", " 'propCount': 791,\n", " 'correct': 176473648,\n", " 'incorrect': 1141510,\n", " 'VR': 0.006426872643381034}}" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valueTypeConstViolations" ] }, { "cell_type": "markdown", "id": "traditional-shakespeare", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 78, "id": "spoken-symphony", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "878ab763f4fa4cb9a540c8bf86ea76ec", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/297 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for value type constraint checks\")" ] }, { "cell_type": "markdown", "id": "motivated-sympathy", "metadata": {}, "source": [ "## Item Requires Statement Constraint" ] }, { "cell_type": "markdown", "id": "chubby-glass", "metadata": {}, "source": [ "### Understand Constraints File" ] }, { "cell_type": "code", "execution_count": 35, "id": "funny-batch", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/itemRequiresConstraint/claims.type-constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 36, "id": "original-expression", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 37, "id": "adequate-symphony", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2305', 'P2316', 'P2304', 'P2303', 'P6607', 'P4155',\n", " 'P31', 'P2916', 'P4680', 'P2308'], dtype=object)" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 38, "id": "infrared-canal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 7182\n", "P2305 2540\n", "P2316 2523\n", "P2303 422\n", "P2304 14\n", "P6607 14\n", "P2916 5\n", "P4680 2\n", "P4155 1\n", "P2308 1\n", "P31 1\n", "Name: label, dtype: int64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 39, "id": "focused-karen", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 40, "id": "private-boundary", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1id
P1006P1006-P2302-Q21503247-0451ef47-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010P1010-P2302-Q21503247-56183614-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010-P2302-Q21503247-fd256eaf-0NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015P1015-P2302-Q21503247-20e3bfc5-0NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017P1017-P2302-Q21503247-bbac2ce3-0NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN [P214] NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN [P31] NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN NaN [Q794] [P17] NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN [P31] NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN [P214] NaN \n", "\n", "label P2316 P2916 P31 P4155 P4680 \\\n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN NaN NaN NaN NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN NaN NaN NaN NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 [Q21502408] NaN NaN NaN NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN NaN NaN NaN NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 id \n", "P1006 P1006-P2302-Q21503247-0451ef47-0 NaN \n", "P1010 P1010-P2302-Q21503247-56183614-0 NaN \n", " P1010-P2302-Q21503247-fd256eaf-0 NaN \n", "P1015 P1015-P2302-Q21503247-20e3bfc5-0 NaN \n", "P1017 P1017-P2302-Q21503247-bbac2ce3-0 NaN " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 41, "id": "conceptual-schedule", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 42, "id": "third-hayes", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1006NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1015NaNNaNNaN[P31]NaNNaNNaNNaNNaNNaNNaN
P1017NaNNaNNaN[P214]NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 P4680 \\\n", "node1 \n", "P1006 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1010 NaN NaN [Q794] [P17] NaN [Q21502408] NaN NaN NaN NaN \n", "P1015 NaN NaN NaN [P31] NaN NaN NaN NaN NaN NaN \n", "P1017 NaN NaN NaN [P214] NaN NaN NaN NaN NaN NaN \n", "\n", "label P6607 \n", "node1 \n", "P1006 NaN \n", "P1010 NaN \n", "P1010 NaN \n", "P1015 NaN \n", "P1017 NaN " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "shaped-companion", "metadata": {}, "source": [ "However, there is one anomaly where the property does not have a co-dependency constraint associated with it, but still has a link to this constraint." ] }, { "cell_type": "code", "execution_count": 43, "id": "indian-journal", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P5447NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
P5448NaNNaN[Q55426051][P5446]NaNNaNNaNNaNNaN[Q46466783]NaN
\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 P2308 P2316 P2916 P31 P4155 \\\n", "node1 \n", "P5447 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "P5448 NaN NaN [Q55426051] [P5446] NaN NaN NaN NaN NaN \n", "\n", "label P4680 P6607 \n", "node1 \n", "P5447 [Q46466783] NaN \n", "P5448 [Q46466783] NaN " ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P4680'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "code", "execution_count": 44, "id": "discrete-template", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2304P2305P2306P2308P2316P2916P31P4155P4680P6607
node1
P1010NaNNaN[Q794][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q20808382, Q28218485, Q3044918][P39]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q82955][P106]NaNNaNNaNNaNNaNNaNNaN
P1045NaNNaN[Q5][P31]NaN[Q21502408]NaNNaNNaNNaNNaN
P1045NaNNaN[Q142, Q71084][P27]NaNNaNNaNNaNNaNNaNNaN
....................................
P980NaNNaN[Q34][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q55][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P981NaNNaN[Q1852859][P31]NaNNaNNaNNaNNaNNaNNaN
P988NaNNaN[Q928][P17]NaN[Q21502408]NaNNaNNaNNaNNaN
P990[Q49678, Q853715]NaN[Q5][P31]NaNNaNNaNNaNNaNNaNNaN
\n", "

2540 rows × 11 columns

\n", "
" ], "text/plain": [ "label P2303 P2304 P2305 P2306 \\\n", "node1 \n", "P1010 NaN NaN [Q794] [P17] \n", "P1045 NaN NaN [Q20808382, Q28218485, Q3044918] [P39] \n", "P1045 NaN NaN [Q82955] [P106] \n", "P1045 NaN NaN [Q5] [P31] \n", "P1045 NaN NaN [Q142, Q71084] [P27] \n", "... ... ... ... ... \n", "P980 NaN NaN [Q34] [P17] \n", "P981 NaN NaN [Q55] [P17] \n", "P981 NaN NaN [Q1852859] [P31] \n", "P988 NaN NaN [Q928] [P17] \n", "P990 [Q49678, Q853715] NaN [Q5] [P31] \n", "\n", "label P2308 P2316 P2916 P31 P4155 P4680 P6607 \n", "node1 \n", "P1010 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "P1045 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P1045 NaN NaN NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... ... ... \n", "P980 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P981 NaN NaN NaN NaN NaN NaN NaN \n", "P988 NaN [Q21502408] NaN NaN NaN NaN NaN \n", "P990 NaN NaN NaN NaN NaN NaN NaN \n", "\n", "[2540 rows x 11 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires[dfItemRequires['P2305'].apply(lambda p: type(p) == list)]" ] }, { "cell_type": "markdown", "id": "forced-christmas", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "markdown", "id": "acquired-floor", "metadata": {}, "source": [ "#### Version 1 - Mandatory + Suggestion + Normal" ] }, { "cell_type": "code", "execution_count": 45, "id": "turkish-establishment", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "58498333945f45ad9e343d82c18fdfb6", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + suggestion + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 46, "id": "peripheral-herald", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2538" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 47, "id": "incorporated-logistics", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "26" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fCnt" ] }, { "cell_type": "code", "execution_count": 123, "id": "welcome-welding", "metadata": {}, "outputs": [], "source": [ "# from tqdm.notebook import tqdm\n", "# import os.path\n", "# import os\n", "# folderName = 'codependencyConstraint'\n", "# for prop in tqdm(dfItemRequires.index.unique()):\n", "# for subFolderName in ['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal']:\n", "# if os.path.isfile(\"../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv\") and \\\n", "# os.path.isfile(\"../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv\"):\n", "# os.system(\"kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", "# ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", "# -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_w_exceptions.tsv\")" ] }, { "cell_type": "code", "execution_count": 48, "id": "optimum-blowing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,28):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/codepConst_MSN_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "indoor-verse", "metadata": {}, "source": [ "#### Version 2 - Mandatory + Normal" ] }, { "cell_type": "code", "execution_count": 51, "id": "furnished-paradise", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1f146f81033b43b7af22588beedb3d15", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory + normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 52, "id": "searching-individual", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2419" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 53, "id": "silver-clarity", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,26):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/codepConst_MN_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "prescription-access", "metadata": {}, "source": [ "#### Version 3 - Mandatory" ] }, { "cell_type": "code", "execution_count": 54, "id": "married-porter", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "932a7247db9b47568838f6219c1e2119", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = mandatory\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 55, "id": "according-blackberry", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1089" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 56, "id": "extraordinary-drawing", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,12):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/codepConst_M_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "subsequent-brown", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 57, "id": "operational-migration", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d81d5232c18940e59e50956651839125", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = normal\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 58, "id": "harmful-binary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1925" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 59, "id": "advance-married", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,21):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/codepConst_N_Validator_new2_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "ranging-journal", "metadata": {}, "source": [ "#### Version 5 - Suggestion" ] }, { "cell_type": "code", "execution_count": 60, "id": "missing-jordan", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "174a69094d0b4d30aeeba66e2eb28a5c", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3147 [00:00(node2)\"]\n", " commandWhere = \" --where '\"\n", " commandWhere = []\n", " \n", " # Version 1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", " constSet = suggestion\n", " \n", " if len(constSet) == 0:\n", " continue\n", " excptns = set()\n", " for (rowNo, constraint) in enumerate(constSet):\n", " prop2 = constraint['P2306']\n", " \n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " commandOtherFiles += \"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv \"\n", " if type(constraint['P2303']) == list: # Exceptions present\n", " if len(excptns) == 0:\n", " excptns = set(constraint['P2303'])\n", " else:\n", " excptns = excptns.intersection(set(constraint['P2303']))\n", " if type(constraint['P2305']) == list:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->(node2_{prop2})\"]\n", " commandWhere += [\"node2_\" + prop2 + \" in \" + str(list(constraint['P2305'])).replace(\"'\",'\"')]\n", " else:\n", " commandMatchMoreFiles += [f\"{prop2}: (node1)-[]->()\"]\n", "# print(commandMatchMoreFiles)\n", " if len(commandWhere) == 0:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\"\n", " else:\n", " command = commandInit + commandOtherFiles + commandMatch + (\", \".join(commandMatchMoreFiles)) + \"'\" + \" --where '\"+(\" and \".join(commandWhere))+\"'\"\n", " \n", " if cnt % 100 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " if len(excptns) == 0:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", " fOP.write(command)\n", " else:\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv\\n\"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/timeLog_\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 61, "id": "soviet-forth", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "312" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 62, "id": "racial-stationery", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,5):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/codepConst_S_Validator_new_3_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "structural-envelope", "metadata": {}, "source": [ "### Merge all correct/incorrect outputs" ] }, { "cell_type": "code", "execution_count": 17, "id": "joined-invention", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "57dcbdd4c8014c9288dbb92b331a05a6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# import os\n", "# from tqdm.notebook import tqdm\n", "\n", "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysisWRemoved/codependencyConstraint/\" + folder + \"/\"\n", "# correct_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".correct.\" in f, os.listdir(folderPath))])\n", "# incorrect_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".incorrect.\" in f, os.listdir(folderPath))])\n", "# # print(files_list)\n", "# os.system(\"{ kgtk cat -i \"+ correct_files_list + \" -o \"+folderPath+\"claims.all.correctSuperSet.tsv -v True; } 2> \"+folderPath+\"claims.all.correctSuperSet.log\")\n", "# os.system(\"{ kgtk cat -i \"+ incorrect_files_list + \" -o \"+folderPath+\"claims.all.incorrectSuperSet.tsv -v True; } 2> \"+folderPath+\"claims.all.incorrectSuperSet.log\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "stopped-bolivia", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "68395f72036a469fad8908d916303bcd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# import os\n", "# from tqdm.notebook import tqdm\n", "\n", "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysisWRemoved/codependencyConstraint_Final/\" + folder + \"/\"\n", "# correct_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".correct.\" in f, os.listdir(folderPath))])\n", "# incorrect_files_list = \" \".join([folderPath + f for f in filter(lambda f: \".incorrect.\" in f, os.listdir(folderPath))])\n", "# # print(files_list)\n", "# os.system(\"{ kgtk cat -i \"+ correct_files_list + \" -o \"+folderPath+\"claims.all.correctSuperSet.tsv; } 2> \"+folderPath+\"claims.all.correctSuperSet.log\")\n", "# os.system(\"{ kgtk cat -i \"+ incorrect_files_list + \" -o \"+folderPath+\"claims.all.incorrectSuperSet.tsv; } 2> \"+folderPath+\"claims.all.incorrectSuperSet.log\")" ] }, { "cell_type": "code", "execution_count": null, "id": "criminal-central", "metadata": {}, "outputs": [], "source": [ "# for folder in tqdm(iter(['Mand_Sugg_Normal', 'Mand_Normal', 'Mand', 'Normal'])):\n", "# folderPath = \"../../allConstraintsAnalysisWRemoved/codependencyConstraint/\" + folder + \"/\"\n", "# folderPathNew = \"../../allConstraintsAnalysisWRemoved/codependencyConstraint_Final/\" + folder + \"/\"\n", "# os.system(f\"screen -dm kgtk ifnotexists -i {folderPathNew}claims.all.correctSuperSet.tsv --filter-on {folderPath}claims.all.correctSuperSet.tsv -o {folderPathNew}claims.all.correctSuperSet.diff.tsv\")\n", "# os.system(f\"screen -dm kgtk ifnotexists -i {folderPathNew}claims.all.incorrectSuperSet.tsv --filter-on {folderPath}claims.all.incorrectSuperSet.tsv -o {folderPathNew}claims.all.incorrectSuperSet.diff.tsv\")\n", " " ] }, { "cell_type": "markdown", "id": "homeless-pleasure", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 45, "id": "welcome-dependence", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "22a3c57a824e437b9c097ce45f245631", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7dd9536a9ed647de9171b7e41388b861", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/5781 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P630742[../../allConstraintsAnalysisWRemoved/codepend...
P322531308[../../allConstraintsAnalysisWRemoved/codepend...
P1336232999[../../allConstraintsAnalysisWRemoved/codepend...
P881010[../../allConstraintsAnalysisWRemoved/codepend...
P667073[../../allConstraintsAnalysisWRemoved/codepend...
............
P1884891323[../../allConstraintsAnalysisWRemoved/codepend...
P1886263[../../allConstraintsAnalysisWRemoved/codepend...
P188812848[../../allConstraintsAnalysisWRemoved/codepend...
P18943848[../../allConstraintsAnalysisWRemoved/codepend...
P18954574[../../allConstraintsAnalysisWRemoved/codepend...
\n", "

2538 rows × 3 columns

\n", "" ], "text/plain": [ " correct incorrect paths\n", "P6307 4 2 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P3225 31 308 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1336 232 999 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P8810 1 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P6670 7 3 [../../allConstraintsAnalysisWRemoved/codepend...\n", "... ... ... ...\n", "P1884 8913 23 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1886 26 3 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1888 128 48 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1894 38 48 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1895 457 4 [../../allConstraintsAnalysisWRemoved/codepend...\n", "\n", "[2538 rows x 3 columns]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1" ] }, { "cell_type": "code", "execution_count": 53, "id": "powered-residence", "metadata": {}, "outputs": [], "source": [ "codepConstDF1['violation_ratio'] = codepConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 54, "id": "chinese-pressing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P467404[../../allConstraintsAnalysisWRemoved/codepend...1.0
P464506[../../allConstraintsAnalysisWRemoved/codepend...1.0
P433401[../../allConstraintsAnalysisWRemoved/codepend...1.0
P50701[../../allConstraintsAnalysisWRemoved/codepend...1.0
P230401[../../allConstraintsAnalysisWRemoved/codepend...1.0
P258601[../../allConstraintsAnalysisWRemoved/codepend...1.0
P794801[../../allConstraintsAnalysisWRemoved/codepend...1.0
P574501[../../allConstraintsAnalysisWRemoved/codepend...1.0
P726301[../../allConstraintsAnalysisWRemoved/codepend...1.0
P664502[../../allConstraintsAnalysisWRemoved/codepend...1.0
P675901[../../allConstraintsAnalysisWRemoved/codepend...1.0
P155505[../../allConstraintsAnalysisWRemoved/codepend...1.0
P822904[../../allConstraintsAnalysisWRemoved/codepend...1.0
P828001[../../allConstraintsAnalysisWRemoved/codepend...1.0
P183701[../../allConstraintsAnalysisWRemoved/codepend...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P4674 0 4 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P4645 0 6 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P4334 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P507 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2304 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2586 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P7948 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P5745 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P7263 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P6645 0 2 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P6759 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1555 0 5 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P8229 0 4 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P8280 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1837 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P4674 1.0 \n", "P4645 1.0 \n", "P4334 1.0 \n", "P507 1.0 \n", "P2304 1.0 \n", "P2586 1.0 \n", "P7948 1.0 \n", "P5745 1.0 \n", "P7263 1.0 \n", "P6645 1.0 \n", "P6759 1.0 \n", "P1555 1.0 \n", "P8229 1.0 \n", "P8280 1.0 \n", "P1837 1.0 " ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 55, "id": "armed-constitution", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../../allConstraintsAnalysisWRemoved/codependencyConstraint_Final/Mand_Sugg_Normal/claims.P2302.correct.tsv',\n", " '../../allConstraintsAnalysisWRemoved/codependencyConstraint_Final/Mand_Sugg_Normal/claims.P2302.incorrect.tsv']" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(codepConstDF1.loc['P2302']['paths'])" ] }, { "cell_type": "code", "execution_count": 56, "id": "continued-desire", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "P10-P2302-Q21502404-d012aef4-0\tP10\tP2302\tQ21502404\tnormal\twikibase-item\r\n", "P10-P2302-Q21510851-5224fe0b-0\tP10\tP2302\tQ21510851\tnormal\twikibase-item\r\n", "P10-P2302-Q21510852-dde2f0ce-0\tP10\tP2302\tQ21510852\tnormal\twikibase-item\r\n", "P10-P2302-Q52004125-d0288d06-0\tP10\tP2302\tQ52004125\tnormal\twikibase-item\r\n", "P10-P2302-Q53869507-974ce3b1-0\tP10\tP2302\tQ53869507\tnormal\twikibase-item\r\n", "P1000-P2302-Q21510856-b2772a67-0\tP1000\tP2302\tQ21510856\tnormal\twikibase-item\r\n", "P1000-P2302-Q21510865-1f5093e9-0\tP1000\tP2302\tQ21510865\tnormal\twikibase-item\r\n", "P1000-P2302-Q53869507-36dbee67-0\tP1000\tP2302\tQ53869507\tnormal\twikibase-item\r\n", "P1001-P2302-Q21502838-3cc7ade2-0\tP1001\tP2302\tQ21502838\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysisWRemoved/codependencyConstraint_Final/Mand_Sugg_Normal/claims.P2302.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 57, "id": "demonstrated-debut", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2347232997624[../../allConstraintsAnalysisWRemoved/codepend...0.992803
P7342119055738534[../../allConstraintsAnalysisWRemoved/codepend...0.258447
P22142989348721476[../../allConstraintsAnalysisWRemoved/codepend...0.194425
P43331113458436934[../../allConstraintsAnalysisWRemoved/codepend...0.013849
P1951150231401686[../../allConstraintsAnalysisWRemoved/codepend...0.258832
P5696926852267556[../../allConstraintsAnalysisWRemoved/codepend...0.037189
P13111274319200519[../../allConstraintsAnalysisWRemoved/codepend...0.017475
P2757295163856[../../allConstraintsAnalysisWRemoved/codepend...0.957377
P2860174842113152425[../../allConstraintsAnalysisWRemoved/codepend...0.000871
P5703471930105652[../../allConstraintsAnalysisWRemoved/codepend...0.029532
P1435212163792401[../../allConstraintsAnalysisWRemoved/codepend...0.041734
P20178677090445[../../allConstraintsAnalysisWRemoved/codepend...0.510369
P1376280679392[../../allConstraintsAnalysisWRemoved/codepend...0.965863
P79021292468302[../../allConstraintsAnalysisWRemoved/codepend...0.840888
P19223063663745[../../allConstraintsAnalysisWRemoved/codepend...0.675401
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P234 7232 997624 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P734 2119055 738534 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2214 2989348 721476 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P433 31113458 436934 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P195 1150231 401686 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P569 6926852 267556 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P131 11274319 200519 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P275 7295 163856 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2860 174842113 152425 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P570 3471930 105652 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1435 2121637 92401 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2017 86770 90445 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1376 2806 79392 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P7902 12924 68302 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1922 30636 63745 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P234 0.992803 \n", "P734 0.258447 \n", "P2214 0.194425 \n", "P433 0.013849 \n", "P195 0.258832 \n", "P569 0.037189 \n", "P131 0.017475 \n", "P275 0.957377 \n", "P2860 0.000871 \n", "P570 0.029532 \n", "P1435 0.041734 \n", "P2017 0.510369 \n", "P1376 0.965863 \n", "P7902 0.840888 \n", "P1922 0.675401 " ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 58, "id": "developed-zimbabwe", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 2538.000000\n", "mean 0.270460\n", "std 0.348561\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.066600\n", "75% 0.500000\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF1['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 59, "id": "unknown-johnston", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios')" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEICAYAAACuxNj9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAd4klEQVR4nO3deZwdVZ338c8XOuyEAIkMJoEGCSrq4yNGxRXGIEtYwjyK4sZiBBccdfBRoo8jvNQZ44yCMg8uURiCOgiiQkZARJZxBQyILAGGBgNJCBAxhE1B5Dd/nHPh5tK3T3Xfvkt3f9+v13111alTVb9Tt2797qmqrquIwMzMbCgbdDsAMzPrfU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkMQ5J6pcUkvq6HctYJ+ntkn7S7ThG23hpl6RPSPpmxbrLJe09wvW8VtKtI5m30yTdJGmv0V7uuE4Wkt4maamkhyWtlnSRpNeMcFlnSHpc0kP5daOkz0naarTjHo8kTZb0JUl35ffj9jw+tY3rPEPSZ1tZRkR8JyL2qbi+IyX9YqTrknRYPqCpobxP0n2SDhzpshsNp13DIWkjSefmdkQrB60q2yMi/jki3t1q3IOsOyTtUhuPiJ9HxHPbsJ7aF7uH82u5pAXDmP8Z+3hEvCAirhjtWMdtspB0HPAl4J+B7YAdgK8A81pY7L9ExJbANOAoYA/gl5I2by3a8U3SRsClwAuA/YDJwCuB+4GXdzGuXut5nQdMAfZsKN8PCODHVRekpFuf718A7wDuaXE55zFK22MMmBIRWwBvAv5R0hu6HdAzRMS4ewFbAQ8Dhw5RZ2NSMrk7v74EbDxE/TOAzzaUbQmsBj5QV/Yu4GZgLXAxsGPdtAA+CNwB/AH4V2CDYcz7XuA24AHgVEB52obAF/Iy7wCOzfX76rbHaTnWVcBngQ3ztCNJH+4v5PX+Hti/br3bAP+et9Fa4LxcfiNwUF29SXn9Lxlk270buBfYYojt+3zgity2m4CDG7b9qcAFwEPAVcBz8jQBJwP3AQ8CNwAvBI4B/gI8nveF/8z1lwPHA9cDjwF9wALg9rzsZcDf1a37SOAXpfchx/9n4K95fQ+McN9dBJzeUHYOcHIe3gP4VV7374C96updAfwT8EvgT8AuOf47ctt+D7y9SbteBfwGWJf/vqphuZ/Jy30I+AkwtUJbVtbH16btcSLw7bppB+f954Ec9/Prpi0H9s7DLwd+neutBv4/sFGe9rP8Pj+S38u3AHsBK1vdXwdpXz91n9VcdjXw0brx75ES77oc2wty+VD7eK2dTY9zwFTgR7kNfwR+Tt3x6BmxtvJG9uqL9M3jifo3YJA6nwauBJ5F6in8CvjMEPXPoCFZ5PIzgbPz8DxgIO9IfcAngV/V1Q3gctIBeAfgv4F3D2PeH5G+ae0ArAH2y9PeC9wCzMzLvpz1k8UPga8Dm+f2Xg28J087Mu9wR5OSzvvyTlVLRBcAZwNbkxLCnrn8Y7V218V/Q5Nt911g8RDbdlJu+yeAjYDXkz5kz63b9rVeSB/wHeC7edq+wDV5u9QO2ts3e89IH6Tr8rbaNJcdCjyb1NN+C+kgsX3d9mlMFs3eh/XqjnDffTUp6dVi24p04P/fwPS8HebmWN+Qx6flulcAd5F6cH153gfrtuP2PH2geSrWvM+sBd6Z53trHt+2brm3A7sCm+bxhRXaMhrJoun2yOMnkpNFju+RvF0mkfbRAZ5OAst5+iD6UlLi7SMdsG8GPtzwPu9SN74XOVnQwv46SPv6Wf+zugfwKOt/YXkX6Ytp7cB/3VDHpYZ2Nj3OAZ8DvpbbMwl4LflzP2isrbyRvfoC3g7cU6hzOzC3bnxfYPkQ9Z/xpuTyhcAlefgiYH7dtA3yG79j3Q64X9309wOXDmPe19RNPwdYkIcvA95bN22f2g5IOgX3GPnDlqe/Fbg8Dx8JDNRN2yzP+zekg8uTwNaDtPvZ+QMyOY+fC3ysyba7hCEOLnknvYf1e1lnASfWbftv1k2bC9ySh19PSrp70PCtaIgP0rsK+8Z1wLy67dOYLJq9D+vVbWH/vQ14Wx4+GvhdHj4e+FZD3YuBI/LwFcCn66ZtTvrW+Mb6978xVlKSuLph+q+BI+uW+8mG/fbHFdrRcrIYanvk8RN5Oln8I3BOw2doVS0G6g6ig6zjw8APG97nZslixPvrIOvtz+t6gJQEg9TLH/SgTfqSEsBWhX28liyaHudIieT8+nYO9Rqv1yzuB6YWzkk/G7izbvzOXFa7w6J2welrhXVNJ3XhAHYEvizpAUkP5HLlOjUrBltnxXnrzwE/CmxR15bG5dbsSPrWsLpu2V8nfdN4xnIj4tE8uAXp2/cfI2JtY6Mj4m7SaYk3SpoC7E/6BjWY+0mJp5lnAysi4smGNhTbHhGXkU4hnArcJ2mRpMlDrAvW31ZIOlzSdXXb54WkLnozzd6HIeU7amr71U1DVD0TODwPvzOPQ3ovD63FmWN9Detv26faFhGPkHpK7yW9/xdIet4g62v8LEDF7d+KUdgejdZrR96fVrB+O2rr3lXSjyTdI+lB0rXNqjdbjHh/HcLUXOcjpMQ0Kce5oaSF+YaQB0mJoFa/aqyDHudIp8EHgJ9IuqN0YX28Jotfk75NHzJEnbtJH76aHXIZke6w2CK/3ttsAZK2APYmneuDtGO+JyKm1L02jYhf1c02c7B1Vpy3mdWDLLdmBWlbTK1b7uSIeEGF5a4AtsnJYDCLSRcyDwV+HRGrmtT7KbDvEDcC3A3MbLgguwPpW2FRRJwSES8FdiOdivhobVKzWWoDknYEvgF8gHTaZQrpeowGn3XoUApx/rxuvxpq+38LmCPplaQeUy0JryD1LOr3kc0jYmGzGCLi4oh4Aymh3EJqa6PGzwIMY/uP1Chsj0brtSPfRTWTwdvxVdL2mBURk0mnlKq+5y3tr81ExF8j4iTSta/35+K3kU7x7k06Bdefy2uxDrnPMfRx7qGI+EhE7Ey61nOcpDnNFjQuk0VErAM+BZwq6RBJm0maJGl/Sf+Sq50FfFLStHz75qeAb1dZvqSNJb2UdLfGWtIFYEjn/z4u6QW53laSDm2Y/aOStpY0E/gQ6XpA1XmbOQf4oKQZkrYmXbCtbYvVpAuSX8y3r24g6TmS9iwtNM97EfCVHPMkSa+rq3IesHtuR7Nve5A+7CuA70t6Xo5h29yDm0u6APgo8LG8jr2Ag0jXOoYk6WWSXiFpEul89Z9Jp84gXVTfubCIzUkfuDV5eUeRehYjcS8wI9/9NWIRsZx008FZpFOctW+p3wYOkrRv/sa5iaS9JM0YbDmStpM0Lyfpx0gXQZ8cpOqFwK5Kt5r3SXoLKfH+aCTx58/HJnl0oxznSJIvMOT2aHQOcICkOXl/+Aip3YN94dqSdC3k4dzbel/D9KH2nRHvrxUtzMveJMf5GKl3vhmpB1Q1ThjiOCfpQEm75PdmHenmjMH2D2CcJguAiPgicBzpQvEa0sHqA6QDHKQ7gpaS7oq5Abg2lw3lY5IeIr1xZ5IurL4qd/eJiB8Cnwe+m7uMN5JOz9Q7P893Heni8WnDmLeZb5DOXf8ut+MHDdMPJ12IW0ZKbucy9Gmheu8kXQC/hXTH0YdrEyLiT8D3gZ0GWSd19R4jfTO6hXT94kHSRfapwFUR8Tjpw7Y/6Y6qrwCHR8QtFeKbTGr/WlIX+35S9xrStt0tn7I5r0lsy4Avknqj9wIvIp1eG4nLSHfG3CPpDyNcRs1i0jfCp5JwRKwgfcv8BE/v0x+l+ed4A9Jn4G7Sac09eeZBkYi4HziQdHC9n3Rh+MCIGGkbbiWdf59O2i//xDN7LsP1jO3RKCJuJfV0/420Hx1EumPv8UGq/1/St/aHSPvP2Q3TTwQW533nzQ3raWV/reIC0v58NKm9d5J6LctIF6vrlfbxoY5zs0i9/odJ+/9XIuLyZkHV7nixDpAUpG7vQLdjGS2SPgXsGhHv6HYsZtY+vfZPSTaGSNoGmE/qfZjZODZuT0NZe0k6mnQa5KKI+Fm34zGz9vJpKDMzK3LPwszMisblNYupU6dGf39/t8MwMxtTrrnmmj9ExLTBpo3LZNHf38/SpUu7HYaZ2ZgiqfE/+Z/i01BmZlbkZGFmZkVOFmZmVuRkYWZmRW1LFpJOV/qd3BvryraRdImk2/LfrXO5JJ0iaUDS9ZJ2r5vniFz/NklHtCteMzNrrp09izNIv1hXbwHpx35mkX6TufZ01P1JD7WaRfqpwK/CU4+TOAF4BelXp06oJRgzM+uctiWL/AiIPzYUzyM9PZL895C68jMjuRKYIml70q86XRIRtR/guYRnJiAzM2uzTl+z2C7/RgKkX5LaLg9PZ/1fL1uZy5qVP4OkYyQtlbR0zZo1oxu1mdkE17UL3JEeSjVqD6aKiEURMTsiZk+bNug/IJqZ2Qh1+j+475W0fUSszqeZ7svlq1j/Z0Fn5LJVpN+jrS+/ot1B9i+44Knh5QsPaPfqzMx6Xqd7FkuA2h1NR5B+Na5Wfni+K2oPYF0+XXUxsE/+Sc+tgX1ymZmZdVDbehaSziL1CqZKWkm6q2khcI6k+aSfCqz9XOGFwFxggPTbtkcBRMQfJX0G+E2u9+mIaLxobmZmbda2ZBERb20yac4gdQM4tslyTgdOH8XQzMxsmPwf3GZmVuRkYWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW1JVkIekfJN0k6UZJZ0naRNJOkq6SNCDpbEkb5bob5/GBPL2/GzGbmU1kHU8WkqYDHwRmR8QLgQ2Bw4DPAydHxC7AWmB+nmU+sDaXn5zrmZlZB3XrNFQfsKmkPmAzYDXweuDcPH0xcEgenpfHydPnSFLnQjUzs44ni4hYBXwBuIuUJNYB1wAPRMQTudpKYHoeng6syPM+ketv27hcScdIWipp6Zo1a9rbCDOzCaYbp6G2JvUWdgKeDWwO7NfqciNiUUTMjojZ06ZNa3VxZmZWpxunofYGfh8RayLiL8APgFcDU/JpKYAZwKo8vAqYCZCnbwXc39mQzcwmtm4ki7uAPSRtlq89zAGWAZcDb8p1jgDOz8NL8jh5+mURER2M18xswuvGNYurSBeqrwVuyDEsAo4HjpM0QLomcVqe5TRg21x+HLCg0zGbmU10feUqoy8iTgBOaCi+A3j5IHX/DBzaibjMzGxw/g9uMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK6qULCS9qN2BmJlZ76ras/iKpKslvV/SVm2NyMzMek6lZBERrwXeDswErpH0H5Le0NbIzMysZ1S+ZhERtwGfBI4H9gROkXSLpP8z3JVKmiLp3Dz/zZJeKWkbSZdIui3/3TrXlaRTJA1Iul7S7sNdn5mZtabqNYv/Jelk4Gbg9cBBEfH8PHzyCNb7ZeDHEfE84MV5uQuASyNiFnBpHgfYH5iVX8cAXx3B+szMrAVVexb/BlwLvDgijo2IawEi4m5Sb6OyfM3jdcBpeRmPR8QDwDxgca62GDgkD88DzozkSmCKpO2Hs04zM2tNX8V6BwB/ioi/AkjaANgkIh6NiG8Nc507AWuAf5f0YuAa4EPAdhGxOte5B9guD08HVtTNvzKXrcbMzDqias/ip8CmdeOb5bKR6AN2B74aES8BHuHpU04AREQAMZyFSjpG0lJJS9esWTPC0MzMbDBVk8UmEfFwbSQPbzbCda4EVkbEVXn8XFLyuLd2ein/vS9PX0W6C6tmRi5bT0QsiojZETF72rRpIwzNzMwGUzVZPFJ/F5KklwJ/GskKI+IeYIWk5+aiOcAyYAlwRC47Ajg/Dy8BDs93Re0BrKs7XWVmZh1Q9ZrFh4HvSbobEPA3wFtaWO/fA9+RtBFwB3AUKXGdI2k+cCfw5lz3QmAuMAA8muuamVkHVUoWEfEbSc8Dar2BWyPiLyNdaURcB8weZNKcQeoGcOxI12VmZq2r2rMAeBnQn+fZXRIRcWZbojIzs55SKVlI+hbwHOA64K+5OAAnCzOzCaBqz2I2sFs+JWRmZhNM1buhbiRd1DYzswmoas9iKrBM0tXAY7XCiDi4LVGZmVlPqZosTmxnEGZm1tuq3jr7X5J2BGZFxE8lbQZs2N7QzMysV1R9RPnRpMdyfD0XTQfOa1NMZmbWY6pe4D4WeDXwIDz1Q0jPaldQZmbWW6omi8ci4vHaiKQ+hvlUWDMzG7uqJov/kvQJYNP829vfA/6zfWGZmVkvqZosFpB+sOgG4D2kh/sN6xfyzMxs7Kp6N9STwDfyy8zMJpiqz4b6PYNco4iInUc9IjMz6znDeTZUzSbAocA2ox+OmZn1okrXLCLi/rrXqoj4EnBAe0MzM7NeUfU01O51oxuQehrD+S0MMzMbw6oe8L9YN/wEsJynf/bUzMzGuap3Q/1tuwMxM7PeVfU01HFDTY+Ik0YnHDMz60XDuRvqZcCSPH4QcDVwWzuCMjOz3lI1WcwAdo+IhwAknQhcEBHvaFdgZmbWO6o+7mM74PG68cdzmZmZTQBVexZnAldL+mEePwRY3JaIzMys51S9G+qfJF0EvDYXHRURv21fWGZm1kuqnoYC2Ax4MCK+DKyUtFObYjIzsx5T9WdVTwCOBz6eiyYB325XUGZm1luq9iz+DjgYeAQgIu4GtmxXUGZm1luqJovHIyLIjymXtHn7QjIzs15TNVmcI+nrwBRJRwM/xT+EZGY2YRTvhpIk4GzgecCDwHOBT0XEJW2OzczMekQxWURESLowIl4EOEGYmU1AVU9DXSvpZW2NxMzMelbVZPEK4EpJt0u6XtINkq5vZcWSNpT0W0k/yuM7SbpK0oCksyVtlMs3zuMDeXp/K+s1M7PhGzJZSNohD+4L7Ay8nvTE2QPz31Z8CLi5bvzzwMkRsQuwFpify+cDa3P5ybmemZl1UKlncR5ARNwJnBQRd9a/RrpSSTNIv+H9zTwuUiI6N1dZTHr+FMA8nn4O1bnAnFzfzMw6pJQs6g/KO4/ier8EfAx4Mo9vCzwQEU/k8ZXA9Dw8HVgBkKevy/XXD1Q6RtJSSUvXrFkziqGamVkpWUST4RGTdCBwX0RcMxrLq4mIRRExOyJmT5s2bTQXbWY24ZVunX2xpAdJPYxN8zB5PCJi8gjW+WrgYElzgU2AycCXSf/w15d7DzOAVbn+KmAm6eGFfcBWwP0jWK+ZmY3QkD2LiNgwIiZHxJYR0ZeHa+MjSRRExMcjYkZE9AOHAZdFxNuBy4E35WpHAOfn4SV5nDz9svzoETMz65DhPKK83Y4HjpM0QLomcVouPw3YNpcfByzoUnxmZhNW1V/Ka4uIuAK4Ig/fAbx8kDp/Bg7taGBmZraeXupZmJlZj3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMrcrIwM7MiJwszMytysjAzsyInCzMzK3KyMDOzIicLMzMr6niykDRT0uWSlkm6SdKHcvk2ki6RdFv+u3Uul6RTJA1Iul7S7p2O2cxsoutGz+IJ4CMRsRuwB3CspN2ABcClETELuDSPA+wPzMqvY4Cvdj5kM7OJrePJIiJWR8S1efgh4GZgOjAPWJyrLQYOycPzgDMjuRKYImn7zkZtZjaxdfWahaR+4CXAVcB2EbE6T7oH2C4PTwdW1M22Mpc1LusYSUslLV2zZk37gjYzm4C6liwkbQF8H/hwRDxYPy0iAojhLC8iFkXE7IiYPW3atFGM1MzMupIsJE0iJYrvRMQPcvG9tdNL+e99uXwVMLNu9hm5zMzMOqSv0yuUJOA04OaIOKlu0hLgCGBh/nt+XfkHJH0XeAWwru50lZnZhNe/4IKnhpcvPKAt6+h4sgBeDbwTuEHSdbnsE6QkcY6k+cCdwJvztAuBucAA8ChwVEejNTOzzieLiPgFoCaT5wxSP4Bj2xqUmZkNyf/BbWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW5GRhZmZFThZmZlbkZGFmZkVOFmZmVuRkYWZmRU4WZmZW1NftAHpd/4ILnhpevvCALkZiZtY97lmYmVmRk4WZmRX5NNQw1J+SqufTUzYcPrVpY5F7FmZmVuRkYWZmRU4WZmZW5GRhZmZFvsA9CnzBsr28fc26z8nCWuaDudn452QxypodOKscUEfroOuDd5m3kdnwOFm0UbP/y2hW3qyOk4v1kk7vL94/e4OTxRgw3ORiZq1zklrfmEkWkvYDvgxsCHwzIhZ2OaRxqTHpVOnVVFlWtz5sw42hmzG3uzfZC+9Hp1V56kKv7yO98kVwTCQLSRsCpwJvAFYCv5G0JCKWdTey3jeaO1orPZwq13LqDfdD24qRLKeVa1OtLL+XYxjKaMXX7jZXWe9w4ylNG6xOL1JEdDuGIkmvBE6MiH3z+McBIuJzg9WfPXt2LF26dMTr65VMbmY2XK0kHUnXRMTswaaNiZ4FMB1YUTe+EnhFfQVJxwDH5NGHJd3awvqmAn9oYf6xZqK1F9zmiWLCtVmfb6nNOzabMFaSRVFELAIWjcayJC1tll3Ho4nWXnCbJwq3efSMlcd9rAJm1o3PyGVmZtYBYyVZ/AaYJWknSRsBhwFLuhyTmdmEMSZOQ0XEE5I+AFxMunX29Ii4qY2rHJXTWWPIRGsvuM0Thds8SsbE3VBmZtZdY+U0lJmZdZGThZmZFU3YZCFpP0m3ShqQtGCQ6RtLOjtPv0pSfxfCHFUV2nycpGWSrpd0qaSm91yPFaU219V7o6SQNOZvs6zSZklvzu/1TZL+o9MxjrYK+/YOki6X9Nu8f8/tRpyjRdLpku6TdGOT6ZJ0St4e10vaveWVRsSEe5Eukt8O7AxsBPwO2K2hzvuBr+Xhw4Czux13B9r8t8Bmefh9E6HNud6WwM+AK4HZ3Y67A+/zLOC3wNZ5/FndjrsDbV4EvC8P7wYs73bcLbb5dcDuwI1Nps8FLgIE7AFc1eo6J2rP4uXAQETcERGPA98F5jXUmQcszsPnAnMkqYMxjrZimyPi8oh4NI9eSfp/lrGsyvsM8Bng88CfOxlcm1Rp89HAqRGxFiAi7utwjKOtSpsDmJyHtwLu7mB8oy4ifgb8cYgq84AzI7kSmCJp+1bWOVGTxWCPD5nerE5EPAGsA7btSHTtUaXN9eaTvpmMZcU25+75zIgYLw8Eq/I+7wrsKumXkq7MT3Qey6q0+UTgHZJWAhcCf9+Z0LpmuJ/3ojHxfxbWWZLeAcwG9ux2LO0kaQPgJODILofSaX2kU1F7kXqPP5P0ooh4oJtBtdlbgTMi4ov5waTfkvTCiHiy24GNFRO1Z1Hl8SFP1ZHUR+q63t+R6Nqj0iNTJO0N/D/g4Ih4rEOxtUupzVsCLwSukLScdG53yRi/yF3lfV4JLImIv0TE74H/JiWPsapKm+cD5wBExK+BTUgPGRyvRv0RSRM1WVR5fMgS4Ig8/CbgsshXjsaoYpslvQT4OilRjPXz2FBoc0Ssi4ipEdEfEf2k6zQHR8TIn2/ffVX27fNIvQokTSWdlrqjgzGOtiptvguYAyDp+aRksaajUXbWEuDwfFfUHsC6iFjdygIn5GmoaPL4EEmfBpZGxBLgNFJXdYB0Iemw7kXcuopt/ldgC+B7+Vr+XRFxcNeCblHFNo8rFdt8MbCPpGXAX4GPRsSY7TVXbPNHgG9I+gfSxe4jx/KXP0lnkRL+1Hwd5gRgEkBEfI10XWYuMAA8ChzV8jrH8PYyM7MOmainoczMbBicLMzMrMjJwszMipwszMysyMnCzMyKnCzMzKzIycLMzIr+B0LHnZgw+Vk2AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 60, "id": "exceptional-dakota", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5')" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF1[codepConstDF1['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 1 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 61, "id": "interior-joseph", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/2538\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF1['violation_ratio'] >= 3.539484)}/{len(codepConstDF1)}\")" ] }, { "cell_type": "markdown", "id": "greater-genetics", "metadata": {}, "source": [ "#### Version 2 - Mand Normal" ] }, { "cell_type": "code", "execution_count": 62, "id": "constant-chance", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF2 = pd.DataFrame(codepConstViolations['Mand_Normal']).T" ] }, { "cell_type": "code", "execution_count": 63, "id": "included-adjustment", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P10064024403[../../allConstraintsAnalysisWRemoved/codepend...
P58782340[../../allConstraintsAnalysisWRemoved/codepend...
P552710[../../allConstraintsAnalysisWRemoved/codepend...
P1949192[../../allConstraintsAnalysisWRemoved/codepend...
P2458583[../../allConstraintsAnalysisWRemoved/codepend...
............
P1935149949[../../allConstraintsAnalysisWRemoved/codepend...
P19371994[../../allConstraintsAnalysisWRemoved/codepend...
P19382540[../../allConstraintsAnalysisWRemoved/codepend...
P19399071[../../allConstraintsAnalysisWRemoved/codepend...
P194070[../../allConstraintsAnalysisWRemoved/codepend...
\n", "

2419 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1006 4024 403 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P5878 234 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P5527 1 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1949 19 2 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P2458 58 3 [../../allConstraintsAnalysisWRemoved/codepend...\n", "... ... ... ...\n", "P1935 1499 49 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1937 199 4 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1938 254 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1939 907 1 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1940 7 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "\n", "[2419 rows x 3 columns]" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2" ] }, { "cell_type": "code", "execution_count": 64, "id": "fundamental-knowing", "metadata": {}, "outputs": [], "source": [ "codepConstDF2['violation_ratio'] = codepConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)" ] }, { "cell_type": "code", "execution_count": 65, "id": "harmful-discipline", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P466801[../../allConstraintsAnalysisWRemoved/codepend...1.0
P618902[../../allConstraintsAnalysisWRemoved/codepend...1.0
P258601[../../allConstraintsAnalysisWRemoved/codepend...1.0
P379601[../../allConstraintsAnalysisWRemoved/codepend...1.0
P286304[../../allConstraintsAnalysisWRemoved/codepend...1.0
P888801[../../allConstraintsAnalysisWRemoved/codepend...1.0
P472701[../../allConstraintsAnalysisWRemoved/codepend...1.0
P578301[../../allConstraintsAnalysisWRemoved/codepend...1.0
P2489012[../../allConstraintsAnalysisWRemoved/codepend...1.0
P2309055[../../allConstraintsAnalysisWRemoved/codepend...1.0
P664502[../../allConstraintsAnalysisWRemoved/codepend...1.0
P231004[../../allConstraintsAnalysisWRemoved/codepend...1.0
P788201[../../allConstraintsAnalysisWRemoved/codepend...1.0
P876901[../../allConstraintsAnalysisWRemoved/codepend...1.0
P3692096[../../allConstraintsAnalysisWRemoved/codepend...1.0
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P4668 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P6189 0 2 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2586 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3796 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2863 0 4 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P8888 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P4727 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P5783 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2489 0 12 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2309 0 55 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P6645 0 2 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2310 0 4 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P7882 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P8769 0 1 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3692 0 96 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P4668 1.0 \n", "P6189 1.0 \n", "P2586 1.0 \n", "P3796 1.0 \n", "P2863 1.0 \n", "P8888 1.0 \n", "P4727 1.0 \n", "P5783 1.0 \n", "P2489 1.0 \n", "P2309 1.0 \n", "P6645 1.0 \n", "P2310 1.0 \n", "P7882 1.0 \n", "P8769 1.0 \n", "P3692 1.0 " ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 66, "id": "unlikely-chamber", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2347232997624[../../allConstraintsAnalysisWRemoved/codepend...0.992803
P22142989348721476[../../allConstraintsAnalysisWRemoved/codepend...0.194425
P43331113458436934[../../allConstraintsAnalysisWRemoved/codepend...0.013849
P2757295163856[../../allConstraintsAnalysisWRemoved/codepend...0.957377
P2860174842113152425[../../allConstraintsAnalysisWRemoved/codepend...0.000871
P1435212163792401[../../allConstraintsAnalysisWRemoved/codepend...0.041734
P79022038760838[../../allConstraintsAnalysisWRemoved/codepend...0.749006
P7084571857276[../../allConstraintsAnalysisWRemoved/codepend...0.556110
P19712201852917[../../allConstraintsAnalysisWRemoved/codepend...0.302495
P15984060049329[../../allConstraintsAnalysisWRemoved/codepend...0.548533
P1111146803[../../allConstraintsAnalysisWRemoved/codepend...0.999979
P856145194943599[../../allConstraintsAnalysisWRemoved/codepend...0.029153
P2248402541608[../../allConstraintsAnalysisWRemoved/codepend...0.911796
P2325407940619[../../allConstraintsAnalysisWRemoved/codepend...0.908743
P2243403536870[../../allConstraintsAnalysisWRemoved/codepend...0.901357
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P234 7232 997624 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2214 2989348 721476 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P433 31113458 436934 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P275 7295 163856 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2860 174842113 152425 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1435 2121637 92401 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P7902 20387 60838 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P708 45718 57276 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P197 122018 52917 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1598 40600 49329 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1111 1 46803 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P856 1451949 43599 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2248 4025 41608 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2325 4079 40619 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2243 4035 36870 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P234 0.992803 \n", "P2214 0.194425 \n", "P433 0.013849 \n", "P275 0.957377 \n", "P2860 0.000871 \n", "P1435 0.041734 \n", "P7902 0.749006 \n", "P708 0.556110 \n", "P197 0.302495 \n", "P1598 0.548533 \n", "P1111 0.999979 \n", "P856 0.029153 \n", "P2248 0.911796 \n", "P2325 0.908743 \n", "P2243 0.901357 " ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 67, "id": "violent-match", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 2419.000000\n", "mean 0.244716\n", "std 0.335376\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.041734\n", "75% 0.420357\n", "max 1.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF2['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 68, "id": "educational-thickness", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios')" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 69, "id": "latin-mitchell", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5')" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF2[codepConstDF2['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 2 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 70, "id": "asian-forwarding", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 0/2419\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF2['violation_ratio'] >= 2.290915)}/{len(codepConstDF2)}\")" ] }, { "cell_type": "markdown", "id": "destroyed-flash", "metadata": {}, "source": [ "#### Version 3 - Mand" ] }, { "cell_type": "code", "execution_count": 71, "id": "consecutive-plenty", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF3 = pd.DataFrame(codepConstViolations['Mand']).T" ] }, { "cell_type": "code", "execution_count": 72, "id": "digital-mileage", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P7213234913[../../allConstraintsAnalysisWRemoved/codepend...
P4019110[../../allConstraintsAnalysisWRemoved/codepend...
P367210[../../allConstraintsAnalysisWRemoved/codepend...
P1808532[../../allConstraintsAnalysisWRemoved/codepend...
P2948172[../../allConstraintsAnalysisWRemoved/codepend...
............
P17701693[../../allConstraintsAnalysisWRemoved/codepend...
P178720951[../../allConstraintsAnalysisWRemoved/codepend...
P2946341181[../../allConstraintsAnalysisWRemoved/codepend...
P1799231[../../allConstraintsAnalysisWRemoved/codepend...
P1800206[../../allConstraintsAnalysisWRemoved/codepend...
\n", "

1089 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P721 32349 13 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P4019 11 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P3672 1 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1808 53 2 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P2948 17 2 [../../allConstraintsAnalysisWRemoved/codepend...\n", "... ... ... ...\n", "P1770 169 3 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1787 2095 1 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P2946 341 181 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1799 23 1 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P1800 20 6 [../../allConstraintsAnalysisWRemoved/codepend...\n", "\n", "[1089 rows x 3 columns]" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3" ] }, { "cell_type": "code", "execution_count": 73, "id": "formed-battle", "metadata": {}, "outputs": [], "source": [ "codepConstDF3['violation_ratio'] = codepConstDF3.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 74, "id": "numerous-construction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P3823557348[../../allConstraintsAnalysisWRemoved/codepend...133.600000
P5051279[../../allConstraintsAnalysisWRemoved/codepend...39.500000
P8613243[../../allConstraintsAnalysisWRemoved/codepend...21.500000
P2191798[../../allConstraintsAnalysisWRemoved/codepend...5.764706
P243115[../../allConstraintsAnalysisWRemoved/codepend...5.000000
P248215[../../allConstraintsAnalysisWRemoved/codepend...5.000000
P2685943[../../allConstraintsAnalysisWRemoved/codepend...4.777778
P322014[../../allConstraintsAnalysisWRemoved/codepend...4.000000
P406914[../../allConstraintsAnalysisWRemoved/codepend...4.000000
P19591039[../../allConstraintsAnalysisWRemoved/codepend...3.900000
P243213[../../allConstraintsAnalysisWRemoved/codepend...3.000000
P79022038760838[../../allConstraintsAnalysisWRemoved/codepend...2.984157
P3421615[../../allConstraintsAnalysisWRemoved/codepend...2.500000
P7783891[../../allConstraintsAnalysisWRemoved/codepend...2.394737
P22523069[../../allConstraintsAnalysisWRemoved/codepend...2.300000
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P3823 55 7348 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P5051 2 79 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P8613 2 43 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P219 17 98 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2431 1 5 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2482 1 5 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2685 9 43 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3220 1 4 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P4069 1 4 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1959 10 39 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2432 1 3 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P7902 20387 60838 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3421 6 15 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P778 38 91 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2252 30 69 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P3823 133.600000 \n", "P5051 39.500000 \n", "P8613 21.500000 \n", "P219 5.764706 \n", "P2431 5.000000 \n", "P2482 5.000000 \n", "P2685 4.777778 \n", "P3220 4.000000 \n", "P4069 4.000000 \n", "P1959 3.900000 \n", "P2432 3.000000 \n", "P7902 2.984157 \n", "P3421 2.500000 \n", "P778 2.394737 \n", "P2252 2.300000 " ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 75, "id": "identified-marble", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "correct 2935\n", "incorrect 79\n", "paths [../../allConstraintsAnalysisWRemoved/codepend...\n", "violation_ratio 0.026917\n", "Name: P1713, dtype: object" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.loc['P1713']" ] }, { "cell_type": "code", "execution_count": 76, "id": "established-mounting", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "Q4681882-P1713-d878eb-9fff460e-0\tQ4681882\tP1713\t\"https://heightnetworth.com/adele-givens-net-worth-2020/\"\tnormal\turl\r\n", "Q1763529-P1713-c190308f-q1763529$ffe0aa48-c924-4913-ad50-343dc2d666f1-0\tQ1763529\tP1713\thttp://www.bundestag.de/bundestag/abgeordnete18/biografien/H/heck_stefan/258446\t\t\r\n", "Q2571287-P1713-6ac225c1-q2571287$bb1e6912-f58f-4106-be9c-55ad68bde759-0\tQ2571287\tP1713\thttp://www.bundestag.de/bundestag/abgeordnete18/biografien/L/lorenz_wilfried/258666\t\t\r\n", "Q41019201-P1713-16045f49-q41019201$f550afb5-9a1e-4e19-b3cf-7cacc61a2e9e-0\tQ41019201\tP1713\thttps://www.bundestag.de/abgeordnete/biografien/P/-/522580\t\t\r\n", "Q41105197-P1713-5ed2f31e-q41105197$d493bc2d-023b-4e2f-babc-221a3e720054-0\tQ41105197\tP1713\thttps://www.bundestag.de/abgeordnete/biografien/S/-/523620\t\t\r\n", "Q41160336-P1713-72b7c975-q41160336$7b946af4-7cb4-4dc8-8a98-6534fb729a4a-0\tQ41160336\tP1713\thttps://www.bundestag.de/abgeordnete/biografien/C/-/518870\t\t\r\n", "Q41356018-P1713-02640366-q41356018$5e59f8fa-cb0a-4abe-9a90-ad9e82a474c4-0\tQ41356018\tP1713\thttps://www.bundestag.de/abgeordnete/biografien/O/-/522500\t\t\r\n", "Q41435421-P1713-7c3cb808-q41435421$38660da2-d639-43fc-9adf-614771a29db9-0\tQ41435421\tP1713\thttps://www.bundestag.de/abgeordnete/biografien/M/muenz_volker/522234\t\t\r\n", "Q29946731-P1713-c997a778-q29946731$5e3c92ab-a298-4fb6-a49c-8e57816275c4-0\tQ29946731\tP1713\thttps://www.bundestag.de/abgeordnete/biografien/R/-/522880\t\t\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysisWRemoved/codependencyConstraint_Final/Mand/claims.P1713.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 77, "id": "naval-functionality", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cat: ../../allConstraintsAnalysisWRemoved/codependencyConstraint/Mand/claims.P1713.incorrect.tsv: No such file or directory\r\n" ] } ], "source": [ "!cat ../../allConstraintsAnalysisWRemoved/codependencyConstraint/Mand/claims.P1713.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 78, "id": "imposed-bibliography", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P79022038760838[../../allConstraintsAnalysisWRemoved/codepend...2.984157
P3823557348[../../allConstraintsAnalysisWRemoved/codepend...133.600000
P1411312323270[../../allConstraintsAnalysisWRemoved/codepend...0.024918
P42826551223[../../allConstraintsAnalysisWRemoved/codepend...0.460640
P108737746351210[../../allConstraintsAnalysisWRemoved/codepend...0.000321
P3647407510[../../allConstraintsAnalysisWRemoved/codepend...1.253071
P7959657006322[../../allConstraintsAnalysisWRemoved/codepend...0.000490
P3802151296[../../allConstraintsAnalysisWRemoved/codepend...0.137610
P39635997281[../../allConstraintsAnalysisWRemoved/codepend...0.046857
P3371903183[../../allConstraintsAnalysisWRemoved/codepend...0.202658
P2946341181[../../allConstraintsAnalysisWRemoved/codepend...0.530792
P3595375171[../../allConstraintsAnalysisWRemoved/codepend...0.031814
P3318278167[../../allConstraintsAnalysisWRemoved/codepend...0.600719
P8425241165[../../allConstraintsAnalysisWRemoved/codepend...0.684647
P19541377164[../../allConstraintsAnalysisWRemoved/codepend...0.119099
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P7902 20387 60838 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3823 55 7348 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P141 131232 3270 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P428 2655 1223 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1087 3774635 1210 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3647 407 510 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P7959 657006 322 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P380 2151 296 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3963 5997 281 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3371 903 183 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2946 341 181 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P359 5375 171 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3318 278 167 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P8425 241 165 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1954 1377 164 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P7902 2.984157 \n", "P3823 133.600000 \n", "P141 0.024918 \n", "P428 0.460640 \n", "P1087 0.000321 \n", "P3647 1.253071 \n", "P7959 0.000490 \n", "P380 0.137610 \n", "P3963 0.046857 \n", "P3371 0.202658 \n", "P2946 0.530792 \n", "P359 0.031814 \n", "P3318 0.600719 \n", "P8425 0.684647 \n", "P1954 0.119099 " ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 79, "id": "emotional-crown", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 1089.000000\n", "mean 0.298588\n", "std 4.291846\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 0.018405\n", "max 133.600000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF3['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 80, "id": "certain-freeze", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios')" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 81, "id": "cooperative-ownership", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005')" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF3[codepConstDF3['violation_ratio'] <= 0.0005].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 3 - Violation Ratios <= 0.0005\")" ] }, { "cell_type": "code", "execution_count": 82, "id": "studied-inclusion", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 44/1089\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF3['violation_ratio'] >= 0.922928)}/{len(codepConstDF3)}\")" ] }, { "cell_type": "markdown", "id": "protective-brazil", "metadata": {}, "source": [ "#### Version 4 - Normal" ] }, { "cell_type": "code", "execution_count": 83, "id": "laughing-pressing", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "codepConstDF4 = pd.DataFrame(codepConstViolations['Normal']).T" ] }, { "cell_type": "code", "execution_count": 84, "id": "loving-swift", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpaths
P1404131[../../allConstraintsAnalysisWRemoved/codepend...
P210036497[../../allConstraintsAnalysisWRemoved/codepend...
P386271[../../allConstraintsAnalysisWRemoved/codepend...
P6769310[../../allConstraintsAnalysisWRemoved/codepend...
P447420[../../allConstraintsAnalysisWRemoved/codepend...
............
P81743671[../../allConstraintsAnalysisWRemoved/codepend...
P277812[../../allConstraintsAnalysisWRemoved/codepend...
P541831[../../allConstraintsAnalysisWRemoved/codepend...
P2783260[../../allConstraintsAnalysisWRemoved/codepend...
P817710[../../allConstraintsAnalysisWRemoved/codepend...
\n", "

1925 rows × 3 columns

\n", "
" ], "text/plain": [ " correct incorrect paths\n", "P1404 13 1 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P2100 364 97 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P3862 7 1 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P6769 31 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P4474 2 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "... ... ... ...\n", "P8174 367 1 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P2778 1 2 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P5418 3 1 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P2783 26 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "P8177 1 0 [../../allConstraintsAnalysisWRemoved/codepend...\n", "\n", "[1925 rows x 3 columns]" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4" ] }, { "cell_type": "code", "execution_count": 85, "id": "north-christian", "metadata": {}, "outputs": [], "source": [ "codepConstDF4['violation_ratio'] = codepConstDF4.apply(lambda p: p.incorrect / p.correct if p.correct != 0 else p.incorrect/100, axis=1)" ] }, { "cell_type": "code", "execution_count": 86, "id": "closing-causing", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1111146803[../../allConstraintsAnalysisWRemoved/codepend...46803.000000
P37432542[../../allConstraintsAnalysisWRemoved/codepend...847.333333
P111683199[../../allConstraintsAnalysisWRemoved/codepend...399.875000
P76951851[../../allConstraintsAnalysisWRemoved/codepend...370.200000
P26434817084[../../allConstraintsAnalysisWRemoved/codepend...355.916667
P1283133871[../../allConstraintsAnalysisWRemoved/codepend...297.769231
P275812318919[../../allConstraintsAnalysisWRemoved/codepend...153.813008
P26372295[../../allConstraintsAnalysisWRemoved/codepend...147.500000
P2347232997624[../../allConstraintsAnalysisWRemoved/codepend...137.945796
P19958911360[../../allConstraintsAnalysisWRemoved/codepend...127.640449
P31563365[../../allConstraintsAnalysisWRemoved/codepend...121.666667
P5251100[../../allConstraintsAnalysisWRemoved/codepend...100.000000
P852464505[../../allConstraintsAnalysisWRemoved/codepend...97.934783
P5971057210[../../allConstraintsAnalysisWRemoved/codepend...68.666667
P5627449[../../allConstraintsAnalysisWRemoved/codepend...64.142857
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1111 1 46803 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P374 3 2542 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1116 8 3199 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P769 5 1851 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2643 48 17084 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1283 13 3871 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2758 123 18919 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2637 2 295 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P234 7232 997624 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1995 89 11360 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P3156 3 365 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P525 1 100 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P852 46 4505 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P597 105 7210 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P562 7 449 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P1111 46803.000000 \n", "P374 847.333333 \n", "P1116 399.875000 \n", "P769 370.200000 \n", "P2643 355.916667 \n", "P1283 297.769231 \n", "P2758 153.813008 \n", "P2637 147.500000 \n", "P234 137.945796 \n", "P1995 127.640449 \n", "P3156 121.666667 \n", "P525 100.000000 \n", "P852 97.934783 \n", "P597 68.666667 \n", "P562 64.142857 " ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 87, "id": "weighted-input", "metadata": {}, "outputs": [], "source": [ "# list(codepConstDF4.sort_values(by=['violation_ratio'],ascending=False).head(5).paths)" ] }, { "cell_type": "code", "execution_count": 88, "id": "brief-effect", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P2347232997624[../../allConstraintsAnalysisWRemoved/codepend...137.945796
P22142989348721476[../../allConstraintsAnalysisWRemoved/codepend...0.241349
P43331113458436934[../../allConstraintsAnalysisWRemoved/codepend...0.014043
P2757295163856[../../allConstraintsAnalysisWRemoved/codepend...22.461412
P2860174842113152425[../../allConstraintsAnalysisWRemoved/codepend...0.000872
P1435212163792401[../../allConstraintsAnalysisWRemoved/codepend...0.043552
P7084571857276[../../allConstraintsAnalysisWRemoved/codepend...1.252811
P19712201852917[../../allConstraintsAnalysisWRemoved/codepend...0.433682
P15984060049329[../../allConstraintsAnalysisWRemoved/codepend...1.215000
P1111146803[../../allConstraintsAnalysisWRemoved/codepend...46803.000000
P856145194943599[../../allConstraintsAnalysisWRemoved/codepend...0.030028
P2248402541608[../../allConstraintsAnalysisWRemoved/codepend...10.337391
P2325407940619[../../allConstraintsAnalysisWRemoved/codepend...9.958078
P2243403536870[../../allConstraintsAnalysisWRemoved/codepend...9.137546
P2244403636801[../../allConstraintsAnalysisWRemoved/codepend...9.118186
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P234 7232 997624 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2214 2989348 721476 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P433 31113458 436934 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P275 7295 163856 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2860 174842113 152425 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1435 2121637 92401 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P708 45718 57276 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P197 122018 52917 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1598 40600 49329 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P1111 1 46803 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P856 1451949 43599 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2248 4025 41608 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2325 4079 40619 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2243 4035 36870 [../../allConstraintsAnalysisWRemoved/codepend... \n", "P2244 4036 36801 [../../allConstraintsAnalysisWRemoved/codepend... \n", "\n", " violation_ratio \n", "P234 137.945796 \n", "P2214 0.241349 \n", "P433 0.014043 \n", "P275 22.461412 \n", "P2860 0.000872 \n", "P1435 0.043552 \n", "P708 1.252811 \n", "P197 0.433682 \n", "P1598 1.215000 \n", "P1111 46803.000000 \n", "P856 0.030028 \n", "P2248 10.337391 \n", "P2325 9.958078 \n", "P2243 9.137546 \n", "P2244 9.118186 " ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4.sort_values(by=['incorrect'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 89, "id": "wireless-passenger", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "count 1925.000000\n", "mean 27.339989\n", "std 1067.009120\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.060000\n", "75% 0.580000\n", "max 46803.000000\n", "Name: violation_ratio, dtype: float64" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "codepConstDF4['violation_ratio'].describe()" ] }, { "cell_type": "code", "execution_count": 90, "id": "civilian-arnold", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios')" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4['violation_ratio'].plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios\")" ] }, { "cell_type": "code", "execution_count": 91, "id": "threaded-cooler", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5')" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "codepConstDF4[codepConstDF4['violation_ratio'] <= 0.5].violation_ratio.plot.hist(bins=100).set_title(\"Co-Dependency Constraint - Version 4 - Violation Ratios <= 0.5\")" ] }, { "cell_type": "code", "execution_count": 92, "id": "olympic-charlotte", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of properties whose violation ratio is greater than mean: 206/1925\n" ] } ], "source": [ "print(f\"No. of properties whose violation ratio is greater than mean: {sum(codepConstDF4['violation_ratio'] >= 2.414703)}/{len(codepConstDF4)}\")" ] }, { "cell_type": "markdown", "id": "published-affiliate", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": null, "id": "aggregate-conservative", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from tqdm.notebook import tqdm\n", "\n", "codepConstViolations = {}\n", "\n", "codepConstViolations = {}\n", "codepConstPropList = set()\n", "\n", "def extractTimes(filename):\n", " times = []\n", " with open(filename) as f:\n", " for line in f:\n", " if \"real\" in line:\n", " line = line.strip()\n", " time1 = line.split(\"\\t\")[1]\n", " mins, sec = time1.split(\"m\")\n", " mins = int(mins)\n", " sec = float(sec[:-1])\n", " times.append(60 * mins + sec)\n", " return times\n", "\n", "# codepConstViolationsSummary = {}\n", "times = []\n", "timesVersion = {\"MSN\": [], \"MN\": [], \"M\": [], \"N\": [], \"S\": []}\n", "filePath = '/data/wd-correctness/propertiesSplitWRemoved/checkViolations/exec_logs/'\n", "for filename in tqdm(os.listdir(filePath)):\n", " if filename.startswith(\"timeLog_codepConst_\"):\n", " ver = filename.split('_')[2]\n", " tempTimes = extractTimes(filePath + filename)\n", " times += tempTimes\n", " timesVersion[ver] += tempTimes\n", "print(pd.Series(times).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "hearing-treasury", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MSN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "animal-vocabulary", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['MN']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "gentle-accessory", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['M']).describe())" ] }, { "cell_type": "code", "execution_count": null, "id": "fresh-namibia", "metadata": {}, "outputs": [], "source": [ "print(pd.Series(timesVersion['N']).describe())" ] }, { "cell_type": "markdown", "id": "industrial-parcel", "metadata": {}, "source": [ "## Symmetric Constraint (Q21510862)\n", "\n", "This constraint says, if node1 has a property with this constraint, then both `(node1)-[prop]->(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "silent-fundamentals", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 2, "id": "known-wednesday", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-12 01:02:26 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510862']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510862)\" \\\n", " -o ../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 3, "id": "legal-diamond", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 4, "id": "exceptional-morris", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "burning-involvement", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 6, "id": "naval-identification", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/symmetricConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 7, "id": "considered-madison", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 8, "id": "alone-cattle", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2316', 'P2303'], dtype=object)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 9, "id": "mighty-ordinary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2316 42\n", "P2303 3\n", "Name: label, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 10, "id": "sensitive-alliance", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 11, "id": "tender-valley", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1id
P1322P1322-P2302-Q21510862-85dea891-0NaN[Normal]
P1327P1327-P2302-Q21510862-a3c3a094-0NaN[Normal]
P1382P1382-P2302-Q21510862-f6bcfecf-0NaN[Normal]
P1560P1560-P2302-Q21510862-fabecaeb-0NaN[Q21502408]
P1639P1639-P2302-Q21510862-384edcd4-0NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 id \n", "P1322 P1322-P2302-Q21510862-85dea891-0 NaN [Normal]\n", "P1327 P1327-P2302-Q21510862-a3c3a094-0 NaN [Normal]\n", "P1382 P1382-P2302-Q21510862-f6bcfecf-0 NaN [Normal]\n", "P1560 P1560-P2302-Q21510862-fabecaeb-0 NaN [Q21502408]\n", "P1639 P1639-P2302-Q21510862-384edcd4-0 NaN [Q21502408]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 12, "id": "cellular-canal", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 13, "id": "desperate-poster", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2316
node1
P1322NaN[Normal]
P1327NaN[Normal]
P1382NaN[Normal]
P1560NaN[Q21502408]
P1639NaN[Q21502408]
\n", "
" ], "text/plain": [ "label P2303 P2316\n", "node1 \n", "P1322 NaN [Normal]\n", "P1327 NaN [Normal]\n", "P1382 NaN [Normal]\n", "P1560 NaN [Q21502408]\n", "P1639 NaN [Q21502408]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "primary-netherlands", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 14, "id": "pointed-haven", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d32afc9683cb4bdc9252b04f5608a5f2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "\n", "folderName = 'symmetricConstraint'\n", "shellFileSuffix = 'symmConst_Validator_'\n", "graph_cache_prefix = 'symm_03'\n", "\n", "for row in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " prop = row[0]\n", " constraint = row[1]\n", " mandatory = []\n", " suggestion = []\n", " normal = []\n", " prop = str(prop)\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " sfname = 'mandatory'\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " sfname = 'suggestion'\n", " elif constraint['P2316'][0] == 'Normal':\n", " sfname = 'normal'\n", " else:\n", " sfname = 'normal'\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplitWRemoved/claims.\"+ prop +\".copy2.tsv \\\n", " --match 'tsv: (node1)-[nodeProp]->(node2), copy2: (node2)-[]->(node1)' \"\n", " \n", " os.system(\"cp ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv ../../propertiesSplitWRemoved/claims.\"+ prop +\".copy2.tsv\")\n", " \n", " if cnt % 60 == 0:\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " command\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = constraint['P2303']\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\"+graph_cache_prefix+\"_\" + str(fCnt) + \".sqlite3.db; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\" + prop + \".correct_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + sfname + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)" ] }, { "cell_type": "code", "execution_count": 15, "id": "polar-canada", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "39" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 105, "id": "virtual-disney", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,2):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/symmConst_Validator_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "coral-cheese", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 16, "id": "governmental-backup", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a2a80c6bae2542478392fe2d804285f3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "472d2081d6734791bba9336e216f61ec", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/13 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P1639210525[../../allConstraintsAnalysisWRemoved/symmetri...0.011737
P1560348815[../../allConstraintsAnalysisWRemoved/symmetri...0.004282
P336418131[../../allConstraintsAnalysisWRemoved/symmetri...0.000551
P2152800[../../allConstraintsAnalysisWRemoved/symmetri...0.000000
P61852820[../../allConstraintsAnalysisWRemoved/symmetri...0.000000
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P1639 2105 25 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P1560 3488 15 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P3364 1813 1 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P2152 80 0 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P6185 282 0 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "\n", " violation_ratio \n", "P1639 0.011737 \n", "P1560 0.004282 \n", "P3364 0.000551 \n", "P2152 0.000000 \n", "P6185 0.000000 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF1 = pd.DataFrame(symmConstViolations['mandatory']).T\n", "symmConstDF1['violation_ratio'] = symmConstDF1.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 8, "id": "gross-extraction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P27891052016590[../../allConstraintsAnalysisWRemoved/symmetri...0.058949
P188953333824740[../../allConstraintsAnalysisWRemoved/symmetri...0.044331
P1971808641737[../../allConstraintsAnalysisWRemoved/symmetri...0.009513
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P2789 105201 6590 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P1889 533338 24740 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P197 180864 1737 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "\n", " violation_ratio \n", "P2789 0.058949 \n", "P1889 0.044331 \n", "P197 0.009513 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF2 = pd.DataFrame(symmConstViolations['suggestion']).T\n", "symmConstDF2['violation_ratio'] = symmConstDF2.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 9, "id": "heavy-scout", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P518805[../../allConstraintsAnalysisWRemoved/symmetri...1.000000
P597401[../../allConstraintsAnalysisWRemoved/symmetri...1.000000
P17061284[../../allConstraintsAnalysisWRemoved/symmetri...0.875000
P2652500836[../../allConstraintsAnalysisWRemoved/symmetri...0.625749
P521424146[../../allConstraintsAnalysisWRemoved/symmetri...0.256140
P684120917693437304[../../allConstraintsAnalysisWRemoved/symmetri...0.221346
P30321743316[../../allConstraintsAnalysisWRemoved/symmetri...0.153473
P1382110751657[../../allConstraintsAnalysisWRemoved/symmetri...0.130145
P2293147361969[../../allConstraintsAnalysisWRemoved/symmetri...0.117869
P13277954706[../../allConstraintsAnalysisWRemoved/symmetri...0.081524
P4545464[../../allConstraintsAnalysisWRemoved/symmetri...0.080000
P45111072790[../../allConstraintsAnalysisWRemoved/symmetri...0.066599
P5306730382[../../allConstraintsAnalysisWRemoved/symmetri...0.053712
P34032174112[../../allConstraintsAnalysisWRemoved/symmetri...0.048994
P46026570612622[../../allConstraintsAnalysisWRemoved/symmetri...0.045349
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P5188 0 5 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P5974 0 1 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P1706 12 84 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P2652 500 836 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P521 424 146 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P684 12091769 3437304 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P3032 1743 316 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P1382 11075 1657 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P2293 14736 1969 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P1327 7954 706 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P4545 46 4 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P451 11072 790 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P530 6730 382 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P3403 2174 112 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "P460 265706 12622 [../../allConstraintsAnalysisWRemoved/symmetri... \n", "\n", " violation_ratio \n", "P5188 1.000000 \n", "P5974 1.000000 \n", "P1706 0.875000 \n", "P2652 0.625749 \n", "P521 0.256140 \n", "P684 0.221346 \n", "P3032 0.153473 \n", "P1382 0.130145 \n", "P2293 0.117869 \n", "P1327 0.081524 \n", "P4545 0.080000 \n", "P451 0.066599 \n", "P530 0.053712 \n", "P3403 0.048994 \n", "P460 0.045349 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "symmConstDF3 = pd.DataFrame(symmConstViolations['normal']).T\n", "symmConstDF3['violation_ratio'] = symmConstDF3.apply(lambda p: p.incorrect / (p.incorrect + p.correct), axis=1)\n", "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 10, "id": "sexual-blowing", "metadata": {}, "outputs": [], "source": [ "# !head ../../allConstraintsAnalysisWRemoved/symmetricConstraint/normal/claims.P3032.incorrect.tsv\n", "\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "legitimate-aspect", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "symmConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "markdown", "id": "unlikely-sewing", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 11, "id": "southern-reasoning", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "01675fcd83284c8ab2aa683f43fef458", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/108 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "markdown", "id": "informed-animal", "metadata": {}, "source": [ "## Inverse Constraint (Q21510855)\n", "\n", "This constraint says, if node1 has a property with this constraint, then both `(node1)-[prop]->(node2)` and `(node2)-[prop]->(node1)` must be present with few exceptions" ] }, { "cell_type": "markdown", "id": "dramatic-manchester", "metadata": {}, "source": [ "### Constraints File" ] }, { "cell_type": "code", "execution_count": 4, "id": "leading-server", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-11 11:05:47 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " AND graph_1_c1.\"node2\"=?\r\n", " PARAS: ['P2302', 'Q21510855']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->(:Q21510855)\" \\\n", " -o ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 5, "id": "offshore-sudan", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id\tnode1\tlabel\tnode2\trank\tnode2;wikidatatype\r\n", "P1026-P2302-Q21510855-adc83b86-0\tP1026\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1029-P2302-Q21510855-6b55e057-0\tP1029\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P115-P2302-Q21510855-f7aa0b78-0\tP115\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1151-P2302-Q21510855-0d9aa9c6-0\tP1151\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1204-P2302-Q21510855-e3d53bb6-0\tP1204\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1283-P2302-Q21510855-0e7699bb-0\tP1283\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1308-P2302-Q21510855-2aba96b7-0\tP1308\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1365-P2302-Q21510855-c809b758-0\tP1365\tP2302\tQ21510855\tnormal\twikibase-item\r\n", "P1366-P2302-Q21510855-eee12ef8-0\tP1366\tP2302\tQ21510855\tnormal\twikibase-item\r\n" ] } ], "source": [ "!head ../../constraintsOP/inverseConstraint/claims.constraints_list.tsv" ] }, { "cell_type": "code", "execution_count": 6, "id": "received-colonial", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "qualiDF = pd.read_csv(\"../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz\",sep='\\t')\n", "constDF = pd.read_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_list.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 7, "id": "overall-expense", "metadata": {}, "outputs": [], "source": [ "constDF2 = constDF.set_index('id').join(qualiDF.set_index('node1'),rsuffix='_qualifier').drop(columns=['id', 'node2;wikidatatype_qualifier', 'rank', 'node2', 'label', 'node2;wikidatatype']).rename(columns={'label_qualifier':'label', 'node2_qualifier': 'node2'})\n", "constDF2 = constDF2.reset_index()\n", "constDF2 = constDF2.rename(columns={'index':'id'})\n", "constDF2['label'] = constDF2.label.fillna(\"P2316\")\n", "constDF2['node2'] = constDF2.node2.fillna(\"Normal\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "valid-throat", "metadata": {}, "outputs": [], "source": [ "constDF2.to_csv(\"../../constraintsOP/inverseConstraint/claims.constraints_all.tsv\",sep=\"\\t\",index=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "focused-pennsylvania", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "dfItemRequires = pd.read_csv('../../constraintsOP/inverseConstraint/claims.constraints_all.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 10, "id": "moved-rental", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.groupby(['id','node1','label']).node2.apply(lambda p: p.tolist()).reset_index()" ] }, { "cell_type": "code", "execution_count": 11, "id": "attached-rings", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['P2306', 'P2316', 'P4155', 'P2303'], dtype=object)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].unique()" ] }, { "cell_type": "code", "execution_count": 12, "id": "loving-mileage", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P2306 110\n", "P2316 10\n", "P2303 2\n", "P4155 1\n", "Name: label, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires['label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 13, "id": "local-forty", "metadata": {}, "outputs": [], "source": [ "#Reference: https://stackoverflow.com/a/17298454\n", "# dfItemRequires.pivot_table('node2', ['node1', 'id'], 'label')\n", "dfItemRequires = dfItemRequires.pivot(index=['node1','id'], columns='label', values='node2')" ] }, { "cell_type": "code", "execution_count": 14, "id": "pressed-upset", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1id
P1026P1026-P2302-Q21510855-adc83b86-0NaN[P50]NaNNaN
P1029P1029-P2302-Q21510855-6b55e057-0NaN[P5096]NaNNaN
P115P115-P2302-Q21510855-f7aa0b78-0NaN[P466]NaNNaN
P1151P1151-P2302-Q21510855-0d9aa9c6-0NaN[P1204][Q21502408]NaN
P1204P1204-P2302-Q21510855-e3d53bb6-0NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 id \n", "P1026 P1026-P2302-Q21510855-adc83b86-0 NaN [P50] NaN NaN\n", "P1029 P1029-P2302-Q21510855-6b55e057-0 NaN [P5096] NaN NaN\n", "P115 P115-P2302-Q21510855-f7aa0b78-0 NaN [P466] NaN NaN\n", "P1151 P1151-P2302-Q21510855-0d9aa9c6-0 NaN [P1204] [Q21502408] NaN\n", "P1204 P1204-P2302-Q21510855-e3d53bb6-0 NaN [P1151] NaN NaN" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "code", "execution_count": 15, "id": "extra-stomach", "metadata": {}, "outputs": [], "source": [ "dfItemRequires = dfItemRequires.droplevel(1)" ] }, { "cell_type": "code", "execution_count": 16, "id": "seeing-marine", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelP2303P2306P2316P4155
node1
P1026NaN[P50]NaNNaN
P1029NaN[P5096]NaNNaN
P115NaN[P466]NaNNaN
P1151NaN[P1204][Q21502408]NaN
P1204NaN[P1151]NaNNaN
\n", "
" ], "text/plain": [ "label P2303 P2306 P2316 P4155\n", "node1 \n", "P1026 NaN [P50] NaN NaN\n", "P1029 NaN [P5096] NaN NaN\n", "P115 NaN [P466] NaN NaN\n", "P1151 NaN [P1204] [Q21502408] NaN\n", "P1204 NaN [P1151] NaN NaN" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfItemRequires.head()" ] }, { "cell_type": "markdown", "id": "composite-cutting", "metadata": {}, "source": [ "### Query Generator" ] }, { "cell_type": "code", "execution_count": 17, "id": "acoustic-belarus", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b59f5f665aca4df7a79eaa3420c47bc5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.notebook import tqdm\n", "import os.path\n", "import os\n", "\n", "cnt = 0\n", "fCnt = 0\n", "fOP = None\n", "\n", "folderName = 'inverseConstraint_Final'\n", "shellFileSuffix = 'invConst_Validator_new3_'\n", "graph_cache_file_prefix = \"inv_4_\"\n", "\n", "for prop, constraint in tqdm(dfItemRequires.iterrows()):\n", "# try:\n", " \n", " if type(constraint['P2316']) == list:\n", " if constraint['P2316'][0] == 'Q21502408':\n", " subFolderName = \"mandatory\"\n", " elif constraint['P2316'][0] == 'Q62026391':\n", " subFolderName = \"suggestion\"\n", " else:\n", " subFolderName = \"normal\"\n", " \n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv\")):\n", " continue\n", " \n", " prop2 = constraint['P2306']\n", "\n", " if type(prop2) != list:\n", " continue\n", " prop2 = prop2[0]\n", "\n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv\")):\n", " print(f\"File: ../../propertiesSplitWRemoved/claims.{prop2}.tsv does not exist\")\n", " continue\n", " \n", " if cnt % 40 == 0:\n", " if fOP:\n", " fOP.close()\n", " fCnt += 1\n", " fOP = open(\"../../propertiesSplitWRemoved/checkViolations/\" + shellFileSuffix + str(fCnt) + \".sh\",\"w\")\n", " \n", " \n", " command = \"{ time ( kgtk --debug query -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " ../../propertiesSplitWRemoved/claims.\"+ prop2 +\".tsv \\\n", " --match '\"+ \\\n", " f\"{prop}: (node1)-[nodeProp]->(node2), {prop2}: (node2)-[]->(node1)' \"\n", "\n", " if type(constraint['P2303']) != list: # Exceptions not present\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt;\\n\"\n", "# print(command)\n", " fOP.write(command)\n", " else:\n", " excptns = set(constraint['P2303'])\n", " commandRest = \" --return 'distinct nodeProp.id, node1 as `node1`, nodeProp.label as `label`, node2 as `node2`' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \\\n", " kgtk --debug ifnotexists -i ../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \"\n", " \n", " commandOPFile = \"-o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".\"\n", " \n", " command += commandRest + commandOPFile + \"incorrect_wo_exceptions.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug query -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --match '(node1)-[]->()' --where 'node1 in \" + str(list(excptns)).replace(\"'\",'\"') + \"' \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --graph-cache ~/sqlite3_caches/\" + str(graph_cache_file_prefix) + str(fCnt) + \".sqlite3.db; \"\n", "# print(command) \n", " fOP.write(command)\n", " \n", " command = \" kgtk --debug ifnotexists -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_wo_exceptions.tsv \\\n", " --filter-on ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\" + prop + \".incorrect_w_exceptions.tsv \\\n", " --filter-mode NONE \\\n", " --input-keys node1 label \\\n", " --filter-keys node1 label \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect.tsv; \"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " command = \" kgtk cat -i ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct_wo_exceptions.tsv \\\n", " ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".incorrect_w_exceptions.tsv \\\n", " -o ../../allConstraintsAnalysisWRemoved/\" + folderName + \"/\" + subFolderName + \"/claims.\"+ prop +\".correct.tsv ) } 2>> ../../propertiesSplitWRemoved/checkViolations/exec_logs/\" + shellFileSuffix + str(fCnt) + \".txt; \\n\"\n", "# print(command)\n", " fOP.write(command)\n", " \n", " \n", " cnt += 1\n", "# except:\n", "# print(\"Something failed for prop:\",prop)\n", "if fOP:\n", " fOP.close()" ] }, { "cell_type": "code", "execution_count": 18, "id": "large-climb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "110" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": null, "id": "involved-vietnamese", "metadata": {}, "outputs": [], "source": [ "# import os\n", "# for i in range(1,7):\n", "# os.system(\"screen -dm sh ../../propertiesSplitWRemoved/checkViolations/invConst_Validator_new3_\"+str(i)+\".sh\")" ] }, { "cell_type": "markdown", "id": "retired-audio", "metadata": {}, "source": [ "### Analyze Violations" ] }, { "cell_type": "code", "execution_count": 20, "id": "specified-evanescence", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a74a23c16aba41699ee53f4b2fc430ee", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "daa550eb584443cfa2cebf366c663cd1", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/12 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P267383967[../../allConstraintsAnalysisWRemoved/inverseC...0.073951
P41472868[../../allConstraintsAnalysisWRemoved/inverseC...0.027211
P41492864[../../allConstraintsAnalysisWRemoved/inverseC...0.013793
P2033187925[../../allConstraintsAnalysisWRemoved/inverseC...0.013130
P450177922[../../allConstraintsAnalysisWRemoved/inverseC...0.012215
P1151180317[../../allConstraintsAnalysisWRemoved/inverseC...0.009341
\n", "" ], "text/plain": [ " correct incorrect paths \\\n", "P2673 839 67 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P4147 286 8 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P4149 286 4 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P2033 1879 25 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P450 1779 22 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P1151 1803 17 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "\n", " violation_ratio \n", "P2673 0.073951 \n", "P4147 0.027211 \n", "P4149 0.013793 \n", "P2033 0.013130 \n", "P450 0.012215 \n", "P1151 0.009341 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF1 = pd.DataFrame(invConstViolations['mandatory']).T\n", "invConstDF1['violation_ratio'] = invConstDF1.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF1.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 26, "id": "valid-symposium", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P143436775003[../../allConstraintsAnalysisWRemoved/inverseC...0.576382
P155103664753103[../../allConstraintsAnalysisWRemoved/inverseC...0.048730
P156103663640868[../../allConstraintsAnalysisWRemoved/inverseC...0.037928
P62974030240[../../allConstraintsAnalysisWRemoved/inverseC...0.003231
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1434 3677 5003 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P155 1036647 53103 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P156 1036636 40868 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P629 74030 240 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "\n", " violation_ratio \n", "P1434 0.576382 \n", "P155 0.048730 \n", "P156 0.037928 \n", "P629 0.003231 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF2 = pd.DataFrame(invConstViolations['suggestion']).T\n", "invConstDF2['violation_ratio'] = invConstDF2.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF2.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 27, "id": "resident-mustang", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
correctincorrectpathsviolation_ratio
P160513190[../../allConstraintsAnalysisWRemoved/inverseC...0.935961
P34486054575[../../allConstraintsAnalysisWRemoved/inverseC...0.883205
P92515[../../allConstraintsAnalysisWRemoved/inverseC...0.833333
P92615[../../allConstraintsAnalysisWRemoved/inverseC...0.833333
P10294902397[../../allConstraintsAnalysisWRemoved/inverseC...0.830274
P115694824721[../../allConstraintsAnalysisWRemoved/inverseC...0.780606
P51342354[../../allConstraintsAnalysisWRemoved/inverseC...0.701299
P38161427[../../allConstraintsAnalysisWRemoved/inverseC...0.658537
P128314052423[../../allConstraintsAnalysisWRemoved/inverseC...0.632968
P8625915[../../allConstraintsAnalysisWRemoved/inverseC...0.625000
P51328190[../../allConstraintsAnalysisWRemoved/inverseC...0.526316
P42527412938[../../allConstraintsAnalysisWRemoved/inverseC...0.517345
P2512221159[../../allConstraintsAnalysisWRemoved/inverseC...0.418421
P167764[../../allConstraintsAnalysisWRemoved/inverseC...0.400000
P25781111622[../../allConstraintsAnalysisWRemoved/inverseC...0.358915
\n", "
" ], "text/plain": [ " correct incorrect paths \\\n", "P1605 13 190 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P3448 605 4575 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P925 1 5 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P926 1 5 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P1029 490 2397 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P115 6948 24721 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P5134 23 54 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P3816 14 27 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P1283 1405 2423 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P8625 9 15 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P5132 81 90 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P425 2741 2938 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P2512 221 159 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P1677 6 4 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "P2578 1111 622 [../../allConstraintsAnalysisWRemoved/inverseC... \n", "\n", " violation_ratio \n", "P1605 0.935961 \n", "P3448 0.883205 \n", "P925 0.833333 \n", "P926 0.833333 \n", "P1029 0.830274 \n", "P115 0.780606 \n", "P5134 0.701299 \n", "P3816 0.658537 \n", "P1283 0.632968 \n", "P8625 0.625000 \n", "P5132 0.526316 \n", "P425 0.517345 \n", "P2512 0.418421 \n", "P1677 0.400000 \n", "P2578 0.358915 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "invConstDF3 = pd.DataFrame(invConstViolations['normal']).T\n", "invConstDF3['violation_ratio'] = invConstDF3.apply(lambda p: p.incorrect / (p.correct + p.incorrect), axis=1)\n", "invConstDF3.sort_values(by=['violation_ratio'],ascending=False).head(15)" ] }, { "cell_type": "code", "execution_count": 28, "id": "dietary-venue", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "head: cannot open ‘../../allConstraintsAnalysisWRemoved/inverseConstraint/normal/claims.P925.incorrect.tsv’ for reading: No such file or directory\r\n" ] } ], "source": [ "!head ../../allConstraintsAnalysisWRemoved/inverseConstraint/normal/claims.P925.incorrect.tsv" ] }, { "cell_type": "code", "execution_count": 29, "id": "entire-gauge", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Symmetric Normal Constraint - Violation Ratios')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "invConstDF3.sort_values(by=['violation_ratio'],ascending=False)['violation_ratio'].plot.hist(bins=100).set_title(\"Symmetric Normal Constraint - Violation Ratios\")" ] }, { "cell_type": "markdown", "id": "working-stable", "metadata": {}, "source": [ "### Find out time required" ] }, { "cell_type": "code", "execution_count": 55, "id": "saved-twelve", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2e8a241c831b4968ae22d06c22c6e85e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/122 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.lineplot(data=pd.Series(times)).set_title(\"Distribution of times (in s) taken for symmetric constraint checks\")" ] }, { "cell_type": "code", "execution_count": null, "id": "numerical-month", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "stuck-criticism", "metadata": {}, "source": [ "# Analysis on properties with constraints" ] }, { "cell_type": "code", "execution_count": 26, "id": "driven-reference", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-03 09:14:12 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_1 AS graph_1_c1\r\n", " WHERE graph_1_c1.\"label\"=?\r\n", " PARAS: ['P2302']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz \\\n", " ../../gdrive-kgtk-dump-2020-12-07/qualifiers.properties.tsv.gz \\\n", " --match \"p: (nodeProp1)-[nodePropEdge:P2302]->()\" \\\n", " -o ../../constraintsOP/claims.constraints_list.tsv \\\n", " --graph-cache ~/sqlite3_caches/temp1345.valuetype.sqlite3.db" ] }, { "cell_type": "code", "execution_count": 39, "id": "exciting-focus", "metadata": {}, "outputs": [], "source": [ "!kgtk unique -i ../../gdrive-kgtk-dump-2020-12-07/claims.properties.tsv.gz --column node1 -o ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 42, "id": "flush-romania", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "node1\tlabel\tnode2\r\n", "P10\tcount\t17\r\n", "P1000\tcount\t10\r\n", "P1001\tcount\t26\r\n", "P1002\tcount\t9\r\n", "P1003\tcount\t20\r\n", "P1004\tcount\t33\r\n", "P1005\tcount\t21\r\n", "P1006\tcount\t26\r\n", "P1007\tcount\t19\r\n" ] } ], "source": [ "!head ../../constraintsOP/claims.propList.tsv" ] }, { "cell_type": "code", "execution_count": 43, "id": "chemical-harris", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props = pd.read_csv(\"../../constraintsOP/claims.constraints_list.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 44, "id": "higher-underground", "metadata": {}, "outputs": [], "source": [ "props2 = props.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 45, "id": "light-appreciation", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8100" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 48, "id": "yellow-helmet", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2336, 8100)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "detected-skiing", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": 32, "id": "processed-perfume", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "props2 = pd.read_csv(\"../../constraintsOP/claims.propList.tsv\", sep='\\t')" ] }, { "cell_type": "code", "execution_count": 33, "id": "increasing-graphics", "metadata": {}, "outputs": [], "source": [ "props2 = props2.groupby(['node1']).node2.apply(list)" ] }, { "cell_type": "code", "execution_count": 34, "id": "posted-ukraine", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8193" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(props2)" ] }, { "cell_type": "code", "execution_count": 35, "id": "fifth-provision", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2415, 8193)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt = 0\n", "totalCnt = 0\n", "for prop in props2.index:\n", " totalCnt += 1\n", " if not(os.path.isfile(\"../../propertiesSplitWRemoved/claims.\"+ prop +\".tsv\")):\n", " continue\n", " else:\n", " cnt += 1\n", "cnt, totalCnt" ] }, { "cell_type": "code", "execution_count": 50, "id": "married-heating", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "node1\n", "P10 [Q21502404, Q21510851, Q21510852, Q52004125, Q...\n", "P1000 [Q21510856, Q21510865, Q53869507]\n", "P1001 [Q21502838, Q21503250, Q21510865, Q25796498]\n", "P1002 [Q21503250, Q21510865]\n", "P1003 [Q19474404, Q21502404, Q21502410, Q21510851, Q...\n", " ... \n", "P1563 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1564 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1565 [Q19474404, Q21502404, Q21502410, Q21503247, Q...\n", "P1566 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "P1567 [Q19474404, Q21502404, Q21502410, Q21502838, Q...\n", "Name: node2, Length: 500, dtype: object" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "props2.head(500)" ] }, { "cell_type": "code", "execution_count": null, "id": "magnetic-conditions", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "kgtkEnv", "language": "python", "name": "kgtkenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "318px" }, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "oldHeight": 122, "position": { "height": "40px", "left": "1170px", "right": "20px", "top": "120px", "width": "250px" }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "varInspector_section_display": "none", "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }