{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "hide_input": false, "scrolled": false }, "outputs": [], "source": [ "import sys\n", "import json\n", "import numpy as np\n", "import pandas as pd\n", "\n", "print (\"Importing modules...\")\n", "import modules\n", "\n", "##################################################\n", "\n", "print (\"Reading data from disk...\", end=' ')\n", "sys.stdout.flush()\n", "\n", "df = pd.read_parquet('./datasets/Metal_all_20180601.parquet')\n", "seqs = np.array(df.sequence)\n", "metals = np.array(df.ligandId)\n", "fingerprints = np.array(df.fingerprint)\n", "\n", "print (\"Done\")\n", "\n", "##################################################\n", "\n", "print (\"Using FOFE encoder...\", end=' ')\n", "sys.stdout.flush()\n", "metal_dict = {}\n", "with open(\"./dictionaries/metal_dict\", 'r') as fp:\n", " metal_dict = json.load(fp)\n", " \n", "num_to_metal = {e: k for k, e in metal_dict.items()}\n", "print (\"Done\")\n", "\n", "##################################################\n", "print(\"Loading metal_predictor...\", end=' ')\n", "from keras.models import model_from_json\n", "\n", "json_file = open('./models/metal_predict.json', 'r')\n", "loaded_model_json = json_file.read()\n", "json_file.close()\n", "\n", "metal_predictor = model_from_json(loaded_model_json)\n", "metal_predictor.load_weights(\"./models/metal_predict.h5\")\n", "print (\"Done\")\n", "\n", "##################################################\n", "factor = 2.33\n", "def threshold_func(y_in, factor):\n", " y_out = np.zeros_like(y_in)\n", " for i in range(y_in.shape[0]):\n", " th= np.mean(y_in[i]) + factor * np.std(y_in[i])\n", " y_out[i] = (y_in[i] > th)\n", " return y_out\n", "\n", "print (\"Threshold factor set to \" + str(factor))\n", "print (\"--------------------------------------------------\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "choice = np.random.randint(58207)\n", "print (\"Choose index [\" + str(choice) + \"]\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hide_input": false }, "outputs": [], "source": [ "print (\"--------------------------------------------------\")\n", "if len(seqs[choice]) > 60:\n", " print (\"The seuqnce is [\" + seqs[choice][:30] + \"...\" + seqs[choice][len(seqs[choice])-30:] + \"]\\n\")\n", "else:\n", " print (\"The seuqnce is [\" + seqs[choice] + \"]\\n\")\n", "metal_out = metal_predictor.predict(modules.FOFE(seqs[choice]))\n", "\n", "max_index = np.argmax(metal_out)\n", "metal = num_to_metal[max_index]\n", "\n", "print (\"This sample is binded to [\" + metal + \"]\")\n", "print (\" Ground truth [\" + metals[choice] + \"]\\n\")\n", "\n", "json_file = open('./models/' + metal + '.json', 'r')\n", "loaded_model_json = json_file.read()\n", "json_file.close()\n", "\n", "MBS_predictor = model_from_json(loaded_model_json)\n", "MBS_predictor.load_weights('./models/' + metal + '.h5')\n", "\n", "MBS_out = MBS_predictor.predict(modules.FOFE(seqs[choice]))\n", "MBS_OneHot = threshold_func(MBS_out, factor)\n", "MBS = [np.where(e==1)[0] for e in MBS_OneHot][0]\n", "print (\"This sample has [\", end=\"\")\n", "print (*MBS, sep=\",\", end=\"\")\n", "print (\"] binding sites\")\n", "print (\" Ground truth [\", end=\"\")\n", "print (*fingerprints[choice], sep=\",\", end=\"\")\n", "print (\"]\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }