{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Explore HDX/MS data for LacI\n", "![LacI peptides and structure.](lacI_files/lacI_diagram_table.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### First choose the state(s)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a98456fa066f4cc785958102c50d7e43", "version_major": 2, "version_minor": 0 }, "text/plain": [] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Choose from the following functional states: \n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b6639e4ba6174d10b74c711853c1962f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Checkbox(value=True, description='IPTG, an inducer molecule')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cff328de5e234a7bad0d871da91213a7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Checkbox(value=True, description='ONPF, an anti-inducer molecule')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4ffc7cf9f1d041119a0a733df906ca36", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Checkbox(value=False, description='APO protein')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ffe09a4412144a8da2c5ae56eadd50ba", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Checkbox(value=True, description='operator DNA')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c2e43336cb9648da96f84e077fc10bb8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Checkbox(value=False, description='TMG, an inducer molecule')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0a3782ecaadb4d31ae7317662b3c9de2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Checkbox(value=False, description='ONPF and operator DNA')" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import math\n", "import matplotlib.pyplot as plt\n", "import pytraj as pt\n", "import nglview as nv\n", "import seaborn as sns\n", "import ipywidgets as widgets\n", "from scipy.optimize import curve_fit\n", "from matplotlib.ticker import FormatStrFormatter\n", "\n", "\n", "print('Choose from the following functional states: ')\n", "CBwidget_IPTG = widgets.Checkbox(description = 'IPTG, an inducer molecule', value=True, disabled=False)\n", "CBwidget_ONPF = widgets.Checkbox(description = 'ONPF, an anti-inducer molecule', value=True, disabled=False)\n", "CBwidget_APO = widgets.Checkbox(description = 'APO protein', value=False, disabled=False)\n", "CBwidget_DNA = widgets.Checkbox(description = 'operator DNA', value=True, disabled=False)\n", "CBwidget_TMG = widgets.Checkbox(description = 'TMG, an inducer molecule', value=False, disabled=False)\n", "CBwidget_ONPFDNA = widgets.Checkbox(description = 'ONPF and operator DNA', value=False, disabled=False)\n", "\n", "display(CBwidget_IPTG, CBwidget_ONPF, CBwidget_APO, CBwidget_DNA, CBwidget_TMG, CBwidget_ONPFDNA)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Run to confirm your choices." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Chosen states:\n", "IPTG ONPF DNA\n" ] } ], "source": [ "states_list = []\n", "\n", "if CBwidget_IPTG.value == True:\n", " states_list.append('IPTG')\n", "if CBwidget_ONPF.value == True:\n", " states_list.append('ONPF')\n", "if CBwidget_APO.value == True:\n", " states_list.append('APO')\n", "if CBwidget_DNA.value == True:\n", " states_list.append('DNA')\n", "if CBwidget_TMG.value == True:\n", " states_list.append('TMG')\n", "if CBwidget_ONPFDNA.value == True:\n", " states_list.append('ONPFDNA')\n", "\n", "print('Chosen states:'); print(*states_list)\n", "\n", "res_peptides_file = pd.ExcelFile('lacI_files/peptide_list.xlsx')\n", "\n", "states_dict = {}\n", "peptide_states_list = ['peptide']\n", "for state in states_list:\n", " states_dict[state] = pd.read_excel(res_peptides_file)\n", " peptide_states_list.append(state)\n", " \n", "single_state_dict = states_dict.get(states_list[0])\n", "all_peptides = single_state_dict[['peptide']].values.tolist()\n", "all_peptides_2 = []\n", "first_res = single_state_dict[['first_res']].values.tolist()\n", "index = 0\n", "\n", "while index < len(all_peptides):\n", " current_first_res = str(int(first_res[index][0]))\n", " all_peptides_2.append(current_first_res + ' - ' + all_peptides[index][0])\n", " index = index + 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compile exchange information for each peptide for chosen states." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "done!\n" ] } ], "source": [ "############\n", "\n", "def getIndexes(dfObj, value):\n", " listOfPos = []\n", " result = dfObj.isin([value])\n", " seriesObj = result.any()\n", " columnNames = list(seriesObj[seriesObj == True].index)\n", " for col in columnNames:\n", " rows = list(result[col][result[col] == True].index)\n", " for row in rows:\n", " listOfPos.append((row, col))\n", " return listOfPos\n", "\n", "############\n", "\n", "# create dictionary keys from peptides\n", "# in subsequent code: add state and each TP avg exchange measurements as values for peptide keys\n", "peptide_exchange_dict = {}\n", " \n", "stdev_dict_IPTG = {}\n", "stdev_dict_ONPF = {}\n", "stdev_dict_APO = {}\n", "stdev_dict_DNA = {}\n", "stdev_dict_ONPFDNA = {}\n", "stdev_dict_TMG = {}\n", "stdev_dict_dict = {'IPTG':stdev_dict_IPTG,\n", " 'ONPF':stdev_dict_ONPF,\n", " 'APO':stdev_dict_APO,\n", " 'DNA':stdev_dict_DNA,\n", " 'ONPFDNA':stdev_dict_ONPFDNA,\n", " 'TMG':stdev_dict_TMG}\n", "\n", "\n", "for peptide in all_peptides:\n", " peptide_exchange_dict[peptide[0]] = []\n", " \n", "for state in states_dict:\n", " i = 0 # peptide counter\n", " for peptide in states_dict[state]['peptide']:\n", " peptide_first_res = states_dict[state]['first_res'][i]\n", " peptide_last_res = states_dict[state]['last_res'][i]\n", "\n", " peptide_file = pd.ExcelFile('lacI_files/compiled_data/' + str(peptide_first_res)\n", " + '_' + str(peptide_last_res) + '_' + peptide + '_fitting.xlsx')\n", " raw_df = pd.read_excel(peptide_file, 'Peptide SD', nrows = 15)\n", " \n", " column_vals = []\n", " listOfPositions = getIndexes(raw_df, state)\n", " for j in range(len(listOfPositions)):\n", " temp_string = (listOfPositions[j][1])\n", " position = int(temp_string[-2:])\n", " column_vals.append(position) # columns in spreadsheet corresponding to data label\n", "\n", " peptide_exchange = {}\n", " for element in column_vals:\n", " if state.lower() == (raw_df.iloc[2][element]).lower():\n", " peptide_exchange[state] = list(raw_df.iloc[4:13,element+2])\n", " \n", " peptide_exchange_dict[peptide].append(peptide_exchange)\n", "\n", "\n", " stdev_df = pd.read_excel(peptide_file, 'Peptide SD', usecols = \"H,P,Y,AH,AP,AX\", \n", " skiprows = 26, nrows = 35, header = None)\n", " stdev_df = stdev_df[0:9]\n", "\n", " stdev_dict_IPTG[peptide] = list(stdev_df.iloc[:, 0])\n", " stdev_dict_ONPF[peptide] = list(stdev_df.iloc[:, 1]) \n", " stdev_dict_APO[peptide] = list(stdev_df.iloc[:, 2])\n", " stdev_dict_DNA[peptide] = list(stdev_df.iloc[:, 3])\n", " stdev_dict_ONPFDNA[peptide] = list(stdev_df.iloc[:, 4])\n", " stdev_dict_TMG[peptide] = list(stdev_df.iloc[:, 5])\n", "\n", " i = i + 1\n", " \n", "print('done!')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Choose two peptides to visualize." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d9d04bbd4f884fc48a56806bafeb504a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Dropdown(description='Peptide 1:', index=3, options=('63 - LIGVA', '72 - ALHAP', '72 - ALHAPSQIVA', '72 - ALHA…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "543432b043da4f9293a08a65a0611009", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Dropdown(description='Peptide 2:', index=12, options=('63 - LIGVA', '72 - ALHAP', '72 - ALHAPSQIVA', '72 - ALH…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "single_state_dict = states_dict.get(states_list[0])\n", "all_peptides = single_state_dict[['peptide']].values.tolist()\n", "all_peptides_2 = []\n", "first_res = single_state_dict[['first_res']].values.tolist()\n", "index = 0\n", "\n", "while index < len(all_peptides):\n", " current_first_res = str(int(first_res[index][0]))\n", " all_peptides_2.append(current_first_res + ' - ' + all_peptides[index][0])\n", " index = index + 1\n", "\n", "DDwidget = widgets.Dropdown(\n", " options = all_peptides_2,\n", " value = '72 - ALHAPSQIVAA',\n", " description = 'Peptide 1:',\n", " disabled = False,\n", ")\n", "\n", "DDwidget2 = widgets.Dropdown(\n", " options = all_peptides_2,\n", " value = '109 - AAVHNL',\n", " description = 'Peptide 2:',\n", " disabled = False,\n", ")\n", "\n", "display(DDwidget)\n", "display(DDwidget2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Run to confirm your choices and fit regression functions to the raw data." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Peptide 1: ALHAPSQIVAA\n", "Peptide 2: AAVHNL\n" ] } ], "source": [ "peptide_input1 = str(DDwidget.value)\n", "peptide_input1 = peptide_input1.split()[-1]\n", "peptide1_firstres = str(DDwidget.value).split()[0]\n", "\n", "peptide_input2 = str(DDwidget2.value)\n", "peptide_input2 = peptide_input2.split()[-1]\n", "peptide2_firstres = str(DDwidget2.value).split()[0]\n", "\n", "selected_peptides = [peptide_input1, peptide_input2]\n", "print('Peptide 1: ' + selected_peptides[0])\n", "print('Peptide 2: ' + selected_peptides[1])\n", "\n", "peptide1_list = peptide_exchange_dict.get(peptide_input1)\n", "peptide2_list = peptide_exchange_dict.get(peptide_input2)\n", "\n", "# fitting\n", "\n", "def exchange_fit(x, a, b, c, d, e, f, g):\n", " max_protons == a + b + c + g - 2 - num_prolines\n", " d > e\n", " e > f\n", " d > f\n", " max_protons == a + b + c + g - 2 - num_prolines\n", "\n", " return max_protons - a * np.exp(-d * x) - b * np.exp(-e * x) - c * np.exp(-f * x) - g\n", "\n", "def exchange_fit_low(x, b, c, e, f, g):\n", " max_protons == b + c + g - 2 - num_prolines\n", " e > f\n", " max_protons == b + c + g - 2 - num_prolines\n", " return max_protons - b * np.exp(-e * x) - c * np.exp(-f * x) - g\n", "\n", "timepoints = [0, 30, 45, 60, 300, 1500, 3600, 7200, 14400]\n", "trialT = np.logspace(1.5, 4.5, 10000)\n", "\n", "peptide1_fit_dict = {}\n", "peptide2_fit_dict = {}\n", "\n", "for element in peptide1_list:\n", " for key, value in element.items():\n", "\n", " peptide1_fit_dict[key] = []\n", "\n", " num_prolines = peptide_input1.count('P')\n", " max_protons = len(peptide_input1) - 2 - num_prolines\n", "\n", " p1_index = 0\n", " peptide1_tps = []\n", " peptide1_ex = []\n", " for tp in value:\n", " if not math.isnan(float(tp)):\n", " peptide1_tps.append(timepoints[p1_index])\n", " peptide1_ex.append(float(tp))\n", " p1_index = p1_index + 1\n", "\n", " if peptide1_ex[-1] > .5:\n", "\n", " popt, pcov = curve_fit(f = exchange_fit, xdata = peptide1_tps, ydata = peptide1_ex,\n", " bounds = (0, [max_protons, max_protons, max_protons, 1, .1, .01, max_protons]),\n", " maxfev = 100000)\n", " exchange_peptide1 = exchange_fit(trialT, *popt)\n", "\n", " else:\n", "\n", " popt, pcov = curve_fit(f = exchange_fit_low, xdata = peptide1_tps, ydata = peptide1_ex,\n", " bounds = (0, [max_protons, max_protons, .1, .01, max_protons]),\n", " maxfev = 100000)\n", " exchange_peptide1 = exchange_fit_low(trialT, *popt)\n", "\n", " peptide1_fit_dict[key] = exchange_peptide1\n", " \n", "for element in peptide2_list:\n", " for key, value in element.items():\n", "\n", " peptide2_fit_dict[key] = []\n", "\n", " num_prolines = peptide_input2.count('P')\n", " max_protons = len(peptide_input2) - 2 - num_prolines\n", "\n", " p2_index = 0\n", " peptide2_tps = []\n", " peptide2_ex = []\n", " for tp in value:\n", " if not math.isnan(float(tp)):\n", " peptide2_tps.append(timepoints[p2_index])\n", " peptide2_ex.append(float(tp))\n", " p2_index = p2_index + 1\n", "\n", " if peptide2_ex[-1] > .5:\n", "\n", " popt, pcov = curve_fit(f = exchange_fit, xdata = peptide2_tps, ydata = peptide2_ex,\n", " bounds = (0, [max_protons, max_protons, max_protons, 1, .1, .01, max_protons]),\n", " maxfev = 100000)\n", " exchange_peptide2 = exchange_fit(trialT, *popt)\n", "\n", " else:\n", "\n", " popt, pcov = curve_fit(f = exchange_fit_low, xdata = peptide2_tps, ydata = peptide2_ex,\n", " bounds = (0, [max_protons, max_protons, .1, .01, max_protons]),\n", " maxfev = 100000)\n", " exchange_peptide2 = exchange_fit_low(trialT, *popt)\n", "\n", " peptide2_fit_dict[key] = exchange_peptide2\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot H/D exchange data and functions for each peptide for each state." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "PDB ID: 2P9H (wild-type LacI core domain + IPTG). \n", "Oriented with the C-terminal subdomain at the top.\n", "\n", "Use the mouse to move the structure. \n", "Double-click for full-screen. Type \"R\" to zoom out, \"I\" to rotate, and \"K\" to rock.\n", "NGLViewer, an amazing tool: H Nguyen, DA Case and AS Rose, Bioinformatics, 2017. doi:10.1093/bioinformatics/btx789\n", "\n", "Peptide ALHAPSQIVAA, residues 74-82, shown as orange surface and sticks.\n", "Peptide AAVHNL, residues 111-114, shown as blue surface and sticks.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b5dd9080582b4f7aa54ea35ed05ccdf9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "NGLWidget()" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "font = {'family' : 'Arial',\n", " 'weight' : 'normal',\n", " 'size' : 20\n", " }\n", "axes = {'titlesize' : 20,\n", " 'titleweight' : 'bold',\n", " 'labelsize' : 20\n", " }\n", "\n", "plt.rc('font', **font)\n", "plt.rc('axes', **axes)\n", "plt.rc('lines', lw = 2)\n", "color_dict = {\n", " 'IPTG' : 'blue',\n", " 'ONPF' : 'orange',\n", " 'APO' : 'green',\n", " 'DNA' : 'red',\n", " 'TMG' : 'gray',\n", " 'ONPFDNA' : 'purple'\n", " }\n", "\n", "figure, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,6))\n", "\n", "for ex_data in peptide_exchange_dict.get(peptide_input1):\n", " for state in ex_data:\n", "\n", " for key in stdev_dict_dict:\n", " if state == key:\n", " current_stdev_dict = stdev_dict_dict.get(state)\n", " current_stdev = current_stdev_dict.get(peptide_input1)\n", "\n", " ax1.plot(timepoints, ex_data.get(state), 'o', label = state, markersize = 10, alpha = 0.5,\n", " color = color_dict.get(state))\n", " ax1.errorbar(timepoints, ex_data.get(state), yerr = current_stdev, linestyle = 'None',\n", " ecolor = color_dict.get(state), capsize = 3, linewidth = 1)\n", " ax1.plot(trialT, peptide1_fit_dict.get(state), '-', color = color_dict.get(state))\n", " y_lim = len(peptide_input1) - 2 - peptide_input1[2:].count('P') + 0.25\n", " \n", " ax1.set_ylabel('# Deuterons')\n", " ax1.set_xlabel('Time (seconds)')\n", " ax1.set_title(peptide1_firstres + ' - ' + peptide_input1)\n", " ax1.set_xscale('log')\n", " ax1.set_ylim(0, y_lim)\n", " ax1.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))\n", "\n", " \n", "for ex_data in peptide_exchange_dict.get(peptide_input2):\n", " for state in ex_data:\n", "\n", " for key in stdev_dict_dict:\n", " if state == key:\n", " current_stdev_dict = stdev_dict_dict.get(state)\n", " current_stdev = current_stdev_dict.get(peptide)\n", "\n", " ax2.plot(timepoints, ex_data.get(state), 'o', label = state, markersize = 10, alpha = 0.5,\n", " color = color_dict.get(state))\n", " ax2.errorbar(timepoints, ex_data.get(state), yerr = current_stdev, linestyle = 'None',\n", " ecolor = color_dict.get(state), capsize = 3, linewidth = 1)\n", " ax2.plot(trialT, peptide2_fit_dict.get(state), '-', color = color_dict.get(state))\n", " y_lim = len(peptide_input2) - 2 - peptide_input2[2:].count('P') + 0.25\n", "\n", " ax2.set_xlabel('Time (seconds)')\n", " ax2.set_title(peptide2_firstres + ' - ' + peptide_input2)\n", " ax2.set_xscale('log')\n", " ax2.set_ylim(0, y_lim)\n", " ax2.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))\n", "\n", " plt.legend(frameon = False, bbox_to_anchor=(1, 1))\n", " \n", "# This section controls the structure widget.\n", "\n", "peptide1_lastres = int(peptide1_firstres) + len(peptide_input1) - 1\n", "peptide2_lastres = int(peptide2_firstres) + len(peptide_input2) - 1\n", "\n", "# view = nv.NGLWidget()\n", "traj = pt.load('lacI_files/2p9h_REDO.pdb')\n", "view = nv.show_pytraj(traj)\n", "view.add_surface(str(int(peptide1_firstres)+2) + \"-\" + str(peptide1_lastres), color = 'orange', opacity='0.5', wireframe=True)\n", "view.add_ball_and_stick(str(int(peptide1_firstres)+2) + \"-\" + str(peptide1_lastres))\n", "view.add_surface(str(int(peptide2_firstres)+2) + \"-\" + str(peptide2_lastres), color = 'lightblue', opacity='0.5', wireframe=True)\n", "view.add_ball_and_stick(str(int(peptide2_firstres)+2) + \"-\" + str(peptide2_lastres))\n", "view.background = \"white\"\n", "# view.add_surface('74-81', color = 'orange', opacity='0.9', wireframe=True)\n", "# view.add_ball_and_stick('246-255')\n", "\n", "\n", "# view.add_representation('line', selection='water') # uncomment this line to see solvent\n", "view._set_size('800px', '600px')\n", "mat = [59.476009917035874 ,66.10295214971443, -76.02228809508843, 0,\n", " -55.56959630110223, 95.16365469618486, 39.27191257844691, 0,\n", " 84.029807431962, 16.14505706800799, 79.77915091670029, 0, \n", " -22.46560287475586, 17.614827632904053, -10.28352165222168, 1]\n", "view._set_camera_orientation(mat)\n", "print('''\n", "PDB ID: 2P9H (wild-type LacI core domain + IPTG). \n", "Oriented with the C-terminal subdomain at the top.\n", "\n", "Use the mouse to move the structure. \n", "Double-click for full-screen. Type \"R\" to zoom out, \"I\" to rotate, and \"K\" to rock.\n", "NGLViewer, an amazing tool: H Nguyen, DA Case and AS Rose, Bioinformatics, 2017. doi:10.1093/bioinformatics/btx789\n", "''')\n", "print('Peptide ' + peptide_input1 + ', residues ' + str(int(peptide1_firstres)+2) + \"-\" + str(peptide1_lastres) + \n", " ', shown as orange surface and sticks.')\n", "print('Peptide ' + peptide_input2 + ', residues ' + str(int(peptide2_firstres)+2) + \"-\" + str(peptide2_lastres) + \n", " ', shown as blue surface and sticks.')\n", "view" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compile mutational phenotype information for peptide 1." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "-------- Mutational phenotype data from Markiewicz et al., J. Mol Biol., 1994 --------\n", "\n", "Peptide 1: ALHAPSQIVAA\n", "\n", "A72: group 12\n", "N-terminal part of the dimerization interface - substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: H C E G K Q R S\n", "Mutations causing small effects on phenotype: Y L P\n", "Mutations causing dramatic effects on phenotype: F\n", "\n", "L73: group 11\n", "IPTG contacts, substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: Y H C F\n", "Mutations causing small effects on phenotype: P\n", "Mutations causing dramatic effects on phenotype: A E G K Q R S\n", "\n", "H74: group 12\n", "N-terminal part of the dimerization interface - substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: L A C E G K Q S\n", "Mutations causing small effects on phenotype: R\n", "Mutations causing dramatic effects on phenotype: Y F P\n", "\n", "A75: group 11\n", "IPTG contacts, substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: \n", "Mutations causing small effects on phenotype: \n", "Mutations causing dramatic effects on phenotype: Y L H C E F G K P Q R S\n", "\n", "P76: group 11\n", "IPTG contacts, substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: A C\n", "Mutations causing small effects on phenotype: L H E F G Q R S\n", "Mutations causing dramatic effects on phenotype: Y K\n", "\n", "S77: group 14\n", "N-terminal domain of the core, near the center of the domain (weak Is)\n", "\n", "Mutations that do not affect the phenotype: A G\n", "Mutations causing small effects on phenotype: L H C E F P Q R\n", "Mutations causing dramatic effects on phenotype: Y K\n", "\n", "Q78: group 12\n", "N-terminal part of the dimerization interface - substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: F S\n", "Mutations causing small effects on phenotype: Y L H A E G\n", "Mutations causing dramatic effects on phenotype: C K P R\n", "\n", "I79: group 11\n", "IPTG contacts, substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: A C S\n", "Mutations causing small effects on phenotype: Y L E F G K P Q R\n", "Mutations causing dramatic effects on phenotype: H\n", "\n", "V80: group 12\n", "N-terminal part of the dimerization interface - substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: A C G Q S\n", "Mutations causing small effects on phenotype: L K P R\n", "Mutations causing dramatic effects on phenotype: Y H E F\n", "\n", "A81: group 12\n", "N-terminal part of the dimerization interface - substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: H F P S\n", "Mutations causing small effects on phenotype: Y L C G K Q R\n", "Mutations causing dramatic effects on phenotype: E\n", "\n", "A82: group 1\n", "Solvent exposed and mostly tolerant to substitutions\n", "\n", "Mutations that do not affect the phenotype: Y H F R\n", "Mutations causing small effects on phenotype: L C E G K P S\n", "Mutations causing dramatic effects on phenotype: Q\n", "\n", "Heatmap\n", "Darker colors indicate increased disruption to phenotype by mutation.\n", "White boxes - WT residue.\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Make dictionaries to organize mutation information\n", "MH_dict = {}\n", "with open(\"lacI_files/MH_groups.txt\") as file:\n", " for line in file:\n", " (key, val) = line.split()\n", " MH_dict[int(key)] = val\n", "\n", "mutation_dict = {}\n", "with open(\"lacI_files/single_mutants.txt\") as file:\n", " for line in file:\n", " (key, val) = line.split()\n", " mutation_dict[key] = val\n", " \n", "MHdescriptions_dict = {}\n", "with open(\"lacI_files/MHgroups_descriptions.txt\") as file:\n", " for line in file:\n", " (key, val) = line.split(':')\n", " MHdescriptions_dict[key] = val\n", "\n", "# This section controls the mutation information.\n", "\n", "peptide1_range = range(int(peptide1_firstres), int(peptide1_lastres) + 1, 1)\n", "\n", "print('''\n", "-------- Mutational phenotype data from Markiewicz et al., J. Mol Biol., 1994 --------\n", "''')\n", "print('Peptide 1: ' + peptide_input1)\n", "\n", "peptide_index = 0\n", "res_label_value = []\n", "AA_value = []\n", "heatmap_value = []\n", "\n", "aa = list(\"YLHACDEFGIKMNPQRSTVW\")\n", "\n", "for residue in peptide1_range:\n", " no_effect = []\n", " small_effect = []\n", " dramatic_effect = []\n", " \n", " # print MH group\n", " for key, value in MH_dict.items(): \n", " if residue == key:\n", " peptide_index = peptide_index + 1\n", " res_label = str(peptide_input1[peptide_index-1]) + str(residue)\n", " print('\\n' + res_label + ': group ' + value)\n", " # print MH group description\n", " for second_key, second_value in MHdescriptions_dict.items():\n", " if value == second_key:\n", " print(second_value)\n", " \n", " # group and print phenotype effects of specific point mutations\n", " for AA in aa:\n", " for key, value in mutation_dict.items():\n", " if key[-1] == AA:\n", " if res_label in key:\n", " if value == '+':\n", " no_effect.append(key[-1])\n", " heatmap_value.append(1)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", " elif value == ('+-'):\n", " small_effect.append(key[-1])\n", " heatmap_value.append(0.25)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", " elif value == ('-+'):\n", " small_effect.append(key[-1])\n", " heatmap_value.append(0.75)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", " elif value == '-':\n", " dramatic_effect.append(key[-1])\n", " heatmap_value.append(0)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", "\n", " print('Mutations that do not affect the phenotype: ' + \" \".join(x for x in no_effect))\n", " print('Mutations causing small effects on phenotype: ' + \" \".join(x for x in small_effect))\n", " print('Mutations causing dramatic effects on phenotype: ' + \" \".join(x for x in dramatic_effect))\n", "\n", "heatmap_df = pd.DataFrame({'Phenotype': AA_value, 'Residue': res_label_value, 'Sensitivity': heatmap_value })\n", "# plot it\n", "print('\\nHeatmap'\n", " '\\nDarker colors indicate increased disruption to phenotype by mutation.'\n", " '\\nWhite boxes - WT residue.')\n", "plt.rcParams['figure.figsize'] = (10.0, 8.0)\n", "plt.rcParams['font.size'] = 18\n", "plt.rcParams['font.family'] = 'Arial'\n", "df_wide=heatmap_df.pivot_table( index='Residue', columns='Phenotype', values='Sensitivity' )\n", "p2=sns.heatmap( df_wide, cmap=\"YlGn_r\" )\n", "p2.set_title(peptide1_firstres + ' - ' + peptide_input1)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compile mutational phenotype information for peptide 2." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "-------- Mutational phenotype data from Markiewicz et al., J. Mol Biol., 1994 --------\n", "\n", "Peptide 2: AAVHNL\n", "\n", "A109: group 1\n", "Solvent exposed and mostly tolerant to substitutions\n", "\n", "Mutations that do not affect the phenotype: Y L H C E F G K P Q R S\n", "Mutations causing small effects on phenotype: \n", "Mutations causing dramatic effects on phenotype: \n", "\n", "A110: group 12\n", "N-terminal part of the dimerization interface - substitutions result in Is phenotype\n", "\n", "Mutations that do not affect the phenotype: L C F G Q S\n", "Mutations causing small effects on phenotype: H E P R\n", "Mutations causing dramatic effects on phenotype: Y K\n", "\n", "V111: group 3\n", "Buried, but tolerant to substitutions\n", "\n", "Mutations that do not affect the phenotype: Y L H A C E F G K P Q R S\n", "Mutations causing small effects on phenotype: \n", "Mutations causing dramatic effects on phenotype: \n", "\n", "H112: group 15\n", "Interface between headpiece of protomer A and core of protomer B, or interface between headpieces A and B - intolerant to substitutions\n", "Mutations that do not affect the phenotype: Y L A C E F G K P Q R S\n", "Mutations causing small effects on phenotype: \n", "Mutations causing dramatic effects on phenotype: \n", "\n", "N113: group 15\n", "Interface between headpiece of protomer A and core of protomer B, or interface between headpieces A and B - intolerant to substitutions\n", "Mutations that do not affect the phenotype: H A C E F G Q R S\n", "Mutations causing small effects on phenotype: \n", "Mutations causing dramatic effects on phenotype: Y L K P\n", "\n", "L114: group 7\n", "Buried, intolerant to substitutions (I-)\n", "\n", "Mutations that do not affect the phenotype: H A C G Q S\n", "Mutations causing small effects on phenotype: Y F\n", "Mutations causing dramatic effects on phenotype: E K P R\n", "\n", "Heatmap\n", "Darker colors indicate increased disruption to phenotype by mutation.\n", "White boxes - WT residue.\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "peptide2_range = range(int(peptide2_firstres), int(peptide2_lastres) + 1, 1)\n", "\n", "print('''\n", "-------- Mutational phenotype data from Markiewicz et al., J. Mol Biol., 1994 --------\n", "''')\n", "print('Peptide 2: ' + peptide_input2)\n", "\n", "peptide_index = 0\n", "res_label_value = []\n", "AA_value = []\n", "heatmap_value = []\n", "\n", "aa = list(\"YLHACDEFGIKMNPQRSTVW\")\n", "\n", "for residue in peptide2_range:\n", " no_effect = []\n", " small_effect = []\n", " dramatic_effect = []\n", " \n", " # print MH group\n", " for key, value in MH_dict.items(): \n", " if residue == key:\n", " peptide_index = peptide_index + 1\n", " res_label = str(peptide_input2[peptide_index-1]) + str(residue)\n", " print('\\n' + res_label + ': group ' + value)\n", " # print MH group description\n", " for second_key, second_value in MHdescriptions_dict.items():\n", " if value == second_key:\n", " print(second_value)\n", " \n", " # group and print phenotype effects of specific point mutations\n", " for AA in aa:\n", " for key, value in mutation_dict.items():\n", " if key[-1] == AA:\n", " if res_label in key:\n", " if value == '+':\n", " no_effect.append(key[-1])\n", " heatmap_value.append(1)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", " elif value == ('+-'):\n", " small_effect.append(key[-1])\n", " heatmap_value.append(0.25)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", " elif value == ('-+'):\n", " small_effect.append(key[-1])\n", " heatmap_value.append(0.75)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", " elif value == '-':\n", " dramatic_effect.append(key[-1])\n", " heatmap_value.append(0)\n", " AA_value.append(AA)\n", " res_label_value.append(residue)\n", "\n", " print('Mutations that do not affect the phenotype: ' + \" \".join(x for x in no_effect))\n", " print('Mutations causing small effects on phenotype: ' + \" \".join(x for x in small_effect))\n", " print('Mutations causing dramatic effects on phenotype: ' + \" \".join(x for x in dramatic_effect))\n", "\n", "heatmap_df = pd.DataFrame({'Phenotype': AA_value, 'Residue': res_label_value, 'Sensitivity': heatmap_value })\n", "# plot it\n", "print('\\nHeatmap'\n", " '\\nDarker colors indicate increased disruption to phenotype by mutation.'\n", " '\\nWhite boxes - WT residue.')\n", "plt.rcParams['figure.figsize'] = (10.0, 8.0)\n", "plt.rcParams['font.size'] = 18\n", "plt.rcParams['font.family'] = 'Arial'\n", "df_wide=heatmap_df.pivot_table( index='Residue', columns='Phenotype', values='Sensitivity' )\n", "p2=sns.heatmap( df_wide, cmap=\"YlGn_r\" )\n", "p2.set_title(peptide2_firstres + ' - ' + peptide_input2)\n", "plt.show()" ] } ], "metadata": { "celltoolbar": "Tags", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }