{ "cells": [ { "cell_type": "markdown", "id": "cordless-better", "metadata": {}, "source": [ "# Deprecated Statements Analysis" ] }, { "cell_type": "code", "execution_count": 2, "id": "canadian-broadcast", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-14 17:58:03 sqlstore]: IMPORT graph directly into table graph_75 from /data/wd-correctness/data/deprecated.tsv ...\n", "[2021-04-14 17:58:36 query]: SQL Translation:\n", "---------------------------------------------\n", " SELECT *\n", " FROM graph_75 AS graph_75_c1\n", " WHERE (graph_75_c1.\"label\" IN (?))\n", " PARAS: ['P31']\n", "---------------------------------------------\n" ] } ], "source": [ "!kgtk --debug query -i ../../data/deprecated.tsv \\\n", " --match '(node1)-[prop]->(node2)' \\\n", " --where 'prop.label in [\"P31\"]' \\\n", " -o ../../opAnalysis/deprecated_P31.tsv" ] }, { "cell_type": "code", "execution_count": 3, "id": "blank-capital", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3303205 ../../opAnalysis/deprecated_P31.tsv\r\n" ] } ], "source": [ "!wc -l ../../opAnalysis/deprecated_P31.tsv" ] }, { "cell_type": "code", "execution_count": 10, "id": "unique-stevens", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "dep_P31_df = pd.read_csv(\"../../opAnalysis/deprecated_P31.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 11, "id": "alternate-snowboard", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Q67206691 2546256\n", "Q523 352194\n", "Q67206785 60055\n", "Q1931185 43618\n", "Q318 35768\n", "Q2247863 21906\n", "Q13890 17533\n", "Q46587 16574\n", "Q6243 13070\n", "Q2154519 12184\n", "Q1153690 10092\n", "Q83373 9998\n", "Q72802727 9948\n", "Q1491746 9106\n", "Q71798532 7641\n", "Name: node2, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dep_P31_df['node2'].value_counts().head(15)" ] }, { "cell_type": "code", "execution_count": 4, "id": "coupled-rochester", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2021-04-14 18:00:30 query]: SQL Translation:\r\n", "---------------------------------------------\r\n", " SELECT *\r\n", " FROM graph_75 AS graph_75_c1\r\n", " WHERE (graph_75_c1.\"label\" IN (?))\r\n", " PARAS: ['P279']\r\n", "---------------------------------------------\r\n" ] } ], "source": [ "!kgtk --debug query -i ../../data/deprecated.tsv \\\n", " --match '(node1)-[prop]->(node2)' \\\n", " --where 'prop.label in [\"P279\"]' \\\n", " -o ../../opAnalysis/deprecated_P279.tsv" ] }, { "cell_type": "code", "execution_count": 5, "id": "bibliographic-wayne", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "307 ../../opAnalysis/deprecated_P279.tsv\r\n" ] } ], "source": [ "!wc -l ../../opAnalysis/deprecated_P279.tsv" ] }, { "cell_type": "code", "execution_count": 12, "id": "caring-gossip", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "dep_P279_df = pd.read_csv(\"../../opAnalysis/deprecated_P279.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 13, "id": "saving-competition", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Q14659 11\n", "Q245932 8\n", "Q27825887 7\n", "Q21451942 6\n", "Q1861967 6\n", "Q1457669 4\n", "Q58840094 4\n", "Q3024240 3\n", "Q26772977 3\n", "Q387917 3\n", "Q192089 3\n", "Q276314 3\n", "Q152574 2\n", "Q209363 2\n", "Q7033037 2\n", "Name: node2, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dep_P279_df['node2'].value_counts().head(15)" ] }, { "cell_type": "code", "execution_count": 15, "id": "critical-pendant", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/nas/home/kshenoy/miniconda3/envs/kgtkEnv/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3165: DtypeWarning: Columns (7,14) have mixed types.Specify dtype option on import or set low_memory=False.\n", " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" ] } ], "source": [ "import pandas as pd\n", "dep_df = pd.read_csv(\"../../data/deprecated.tsv\",sep='\\t')" ] }, { "cell_type": "code", "execution_count": 17, "id": "abstract-disclaimer", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "P31 3303204\n", "P2215 2236125\n", "P2214 2159860\n", "P2216 816191\n", "P2583 461113\n", "P1090 290549\n", "P215 273273\n", "P6879 107265\n", "P7015 66554\n", "P881 55717\n", "Name: label, dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dep_df.label.value_counts().head(10)" ] }, { "cell_type": "markdown", "id": "dramatic-spyware", "metadata": {}, "source": [ "Fin." ] }, { "cell_type": "code", "execution_count": null, "id": "general-hometown", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "kgtkEnv", "language": "python", "name": "kgtkenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "288px" }, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }