''' Described at PyMOL wiki: http://www.pymolwiki.org/index.php/displacementmap Thx for inspiration from Andreas Henschel http://www.mail-archive.com/pymol-users@lists.sourceforge.net/msg05595.html (17 dec 2010) And from Simple scriptin PymMOl http://www.pymolwiki.org/index.php/Simple_Scripting Troels Linnet, 2010-12-18. Calculates the distance for example between all CA atoms between a closed and open form of a structure. Give a data matrix and a gnuplot file, and input to pymol for easy visualisation Possible so select interesting residues in ranges. Needs to be separated with a dot '.' Example input from pymol. with 2 objects. dispmap O5NT-1HP1-A, C5NT-1HPU-C, 30.0, 15.0, resi1=23-25, atom=CA, showsticks=yes ''' from __future__ import print_function from pymol import cmd, stored, selector from math import * import os import re def dispmap(molecule1="NIL", molecule2="NIL", mindist=30.0, mindelta=15.0, resi1=str(0), resi2=str(0), atom='CA', listlength=5, showsticks='yes'): if molecule1 == "NIL": assert len(cmd.get_names()) != 0, "Did you forget to load a molecule? There are no objects in pymol." molecule1 = cmd.get_names()[0] if molecule2 == "NIL": assert len(cmd.get_names()) != 0, "Did you forget to load a molecule? There are no objects in pymol." molecule2 = cmd.get_names()[1] print("You passed in %s and %s" % (molecule1, molecule2)) # Open filenames filename = str(molecule1) + "-" + str(molecule2) + "-" + str(atom) + "-dist" backbonefilename = str(molecule1) + "-" + str(molecule2) + "-" + str(atom) + "backbone-dist.txt" outfile = open(filename + ".txt", "w") backboneoutfile = open(filename + "-backbone.txt", "w") gnuoutfile = open(filename + ".plt", "w") print(("I have opened matrix %s for you\n" % (filename + ".txt"))) # Sorting for interesting residues for Obj1 and Obj2. # Input is a string, and need to be sorted. resi1 = resi1.split('.') resi2 = resi2.split('.') resi1List = [] resi2List = [] for i in resi1: if '-' in i: tmp = i.split('-') resi1List.extend(list(range(int(tmp[0]), int(tmp[-1]) + 1))) if '-' not in i: resi1List.append(int(i)) for i in resi2: if '-' in i: tmp = i.split('-') resi2List.extend(list(range(int(tmp[0]), int(tmp[-1]) + 1))) if '-' not in i: resi2List.append(int(i)) resi1List.sort() resi2List.sort() # Only take the lines where atom is specified in input Object3 = molecule1 + " and name " + str(atom) Object4 = molecule2 + " and name " + str(atom) # Open 2 name lists # Append residue and atom name to the lists stored.OpenPDB = [] stored.ClosedPDB = [] cmd.iterate(Object3, "stored.OpenPDB.append((resi, name, resn))") cmd.iterate(Object4, "stored.ClosedPDB.append((resi, name, resn))") # Open 2 x,y,z position lists # Append atom position stored.OpenPos = [] stored.ClosedPos = [] cmd.iterate_state(1, selector.process(Object3), "stored.OpenPos.append((x,y,z))") cmd.iterate_state(1, selector.process(Object4), "stored.ClosedPos.append((x,y,z))") # Sometimes residues gets skipped in X-ray crys, because of low signal or sim. This leads to number conflict. # Make ordered lists according to residue number. Find largest residue number via -1 OpenOrderedPDB = [] ClosedOrderedPDB = [] OpenOrderedPos = [] ClosedOrderedPos = [] BackboneDisp = [] # First fill lists with zeros for i in range(int(stored.OpenPDB[-1][0]) + 1): OpenOrderedPDB.append([0, 0, 0]) for i in range(int(stored.ClosedPDB[-1][0]) + 1): ClosedOrderedPDB.append([0, 0, 0]) for i in range(int(stored.OpenPDB[-1][0]) + 1): OpenOrderedPos.append((0, 0, 0)) for i in range(int(stored.ClosedPDB[-1][0]) + 1): ClosedOrderedPos.append((0, 0, 0)) for i in range(int(stored.OpenPDB[-1][0]) + 1): BackboneDisp.append([i, 0, "NIL", atom]) # Fill in data the right places j = 0 for i in stored.OpenPDB: OpenOrderedPDB[int(i[0])] = [int(i[0]), i[1], i[2]] OpenOrderedPos[int(i[0])] = stored.OpenPos[j] j = j + 1 j = 0 for i in stored.ClosedPDB: ClosedOrderedPDB[int(i[0])] = [int(i[0]), i[1], i[2]] ClosedOrderedPos[int(i[0])] = stored.ClosedPos[j] j = j + 1 # Make a list with the missing residues MissingRes = [] for index, resi in enumerate(OpenOrderedPDB): if abs(OpenOrderedPDB[index][0] - ClosedOrderedPDB[index][0]) != 0: MissingRes.append(abs(OpenOrderedPDB[index][0] - ClosedOrderedPDB[index][0])) print("Following residues miss in one of the files, and are discarded for") print("further calculations") print(MissingRes) print("") # Make the data matrix CalcMatrix = create_nXn_matrix(len(OpenOrderedPos)) print(("Calculating a %s X %s distance Matrix" % (len(OpenOrderedPos), len(ClosedOrderedPos)))) # Make a list with 10 most negative/positive distances MaxNegDist = [] MaxPosDist = [] for i in range(int(listlength)): MaxNegDist.append([0, 0, 0, 0, 0, 0, 0]) MaxPosDist.append([0, 0, 0, 0, 0, 0, 0]) # Calculate distances for i in range(len(OpenOrderedPos)): for j in range(len(ClosedOrderedPos)): if OpenOrderedPos[i][0] != 0 and ClosedOrderedPos[j][0] != 0 and OpenOrderedPDB[i][0] not in MissingRes and ClosedOrderedPDB[j][0] not in MissingRes: distOpenOpen = distance(OpenOrderedPos, OpenOrderedPos, i, j) distClosedClosed = distance(ClosedOrderedPos, ClosedOrderedPos, i, j) distOpenClosed = distance(OpenOrderedPos, ClosedOrderedPos, i, j) DeltaDist = distOpenClosed - distOpenOpen if i == j: BackboneDisp[i] = [i, DeltaDist, OpenOrderedPDB[i][2], atom] # Test if distance is larger than threshold if distOpenOpen >= float(mindist) and distClosedClosed >= float(mindist) and abs(DeltaDist) >= float(mindelta): CalcMatrix[i][j] = str(round(DeltaDist, 0)) if DeltaDist < 0 and DeltaDist < MaxNegDist[-1][0] and (i in resi1List or resi1List[-1] == 0) and (j in resi2List or resi2List[-1] == 0): MaxNegDist[-1][0] = DeltaDist MaxNegDist[-1][1] = i MaxNegDist[-1][2] = j MaxNegDist[-1][3] = distOpenOpen MaxNegDist[-1][4] = distOpenClosed MaxNegDist[-1][5] = str(OpenOrderedPDB[i][2]) MaxNegDist[-1][6] = str(ClosedOrderedPDB[j][2]) MaxNegDist = sorted(MaxNegDist) if DeltaDist > 0 and DeltaDist > MaxPosDist[-1][0] and (i in resi1List or resi1List[-1] == 0) and (j in resi2List or resi2List[-1] == 0): MaxPosDist[-1][0] = DeltaDist MaxPosDist[-1][1] = i MaxPosDist[-1][2] = j MaxPosDist[-1][3] = distOpenOpen MaxPosDist[-1][4] = distOpenClosed MaxPosDist[-1][5] = str(OpenOrderedPDB[i][2]) MaxPosDist[-1][6] = str(ClosedOrderedPDB[j][2]) MaxPosDist = sorted(MaxPosDist, reverse=True) print("I made a datamatrix and backbone.txt file for you") print(("matrix: %s backbone: %s" % (filename + ".txt", filename + "-backbone.txt"))) print("I made a gnuplot file for you, to view the datamatrix and the backbone displacement") print(("filename: %s\n" % (filename + ".plt"))) # Print distance matrix line01 = "# Input 1: %s and Input 2: %s" % (molecule1, molecule2) line02 = "# Find for: %s with min. residue-residue dist: %s Angstrom" % (atom, mindist) line03 = "# Looking for min. displacement dist: %s Angstrom" % (mindelta) line04 = "# I give nr# suggestions: %s, and do I show sticks in pymol?: %s" % (listlength, showsticks) line05 = "# I look for suggestions in the range: ([0]=>means all residues)" line06 = "# for Input 1: %s and for Input 2: %s" % (resi1, resi2) line07 = "# Mutation info is BLOSUM62 log-odds likelihood score and PAM250 is probability in % for evolutionary distance" line08 = "###########################################################################################################" line09 = "# Max Negative and positive distances # Mutation info #" line10 = "# Obj.1 Obj.2 Delta Op-Op Cl-Cl # Obj.1 Obj.2 Delta Op-Op Cl-Cl # Res.1 Res.2 # Res.1 Res.2 #" line11 = "# Res.1 Res.2 -Dist Dist Dist # Res.1 Res.2 +Dist Dist Dist # B62/PAM250% # B62/PAM250% #" outfile.write(line01 + '\n') outfile.write(line02 + '\n') outfile.write(line03 + '\n') outfile.write(line04 + '\n') outfile.write(line05 + '\n') outfile.write(line06 + '\n') outfile.write(line07 + '\n') outfile.write(line08 + '\n') outfile.write(line09 + '\n') outfile.write(line08 + '\n') outfile.write(line10 + '\n') outfile.write(line11 + '\n') outfile.write(line08 + '\n') print(line01) print(line02) print(line03) print(line04) print(line05) print(line06) print(line07) print(line08) print(line09) print(line08) print(line10) print(line11) print(line08) for i in range(len(MaxNegDist)): text = "# %3s%3s %3s%3s %5s %5s %5s # %3s%3s %3s%3s %5s %5s %5s # %2s/%2s %2s/%2s # %2s/%2s %2s/%2s #" % (MaxNegDist[i][5], MaxNegDist[i][1], MaxNegDist[i][6], MaxNegDist[i][2], round(MaxNegDist[i][0], 1), round(MaxNegDist[i][3], 1), round(MaxNegDist[i][4], 1), MaxPosDist[i][5], MaxPosDist[i][1], MaxPosDist[i][6], MaxPosDist[i][2], round(MaxPosDist[i][0], 1), round(MaxPosDist[i][3], 1), round(MaxPosDist[i][4], 1), cysb62(shortaa(str(MaxNegDist[i][5]))), pam250(shortaa(str(MaxNegDist[i][5]))), cysb62(shortaa(str(MaxNegDist[i][6]))), pam250(shortaa(str(MaxNegDist[i][6]))), cysb62(shortaa(str(MaxPosDist[i][5]))), pam250(shortaa(str(MaxPosDist[i][5]))), cysb62(shortaa(str(MaxPosDist[i][6]))), pam250(shortaa(str(MaxPosDist[i][6])))) outfile.write(text + '\n') print(text) for i in range(len(CalcMatrix)): writing = "" for j in range(len(CalcMatrix)): if str(CalcMatrix[i][j]) == "0.0": writing = writing + " " + "?" else: writing = writing + " " + str(CalcMatrix[i][j]) # Add break line writing = writing + " " + "\n" outfile.write(writing) outfile.close() for i in range(len(BackboneDisp)): line = "%3s %4s %3s %3s" % (BackboneDisp[i][0], round(BackboneDisp[i][1], 1), BackboneDisp[i][2], BackboneDisp[i][3]) backboneoutfile.write(line + '\n') backboneoutfile.close() # Make gnuplot plot file gnuoutfile.write("reset" + "\n") gnuoutfile.write("cd " + "'" + os.getcwd() + "'" + "\n") gnuoutfile.write("\n") gnuoutfile.write("#Title hacks \\n is newline, and 0,1 is x,y offset adjustment" + "\n") gnuoutfile.write('set title "Protein ' + str(atom) + ' Displacement matrix map \\n ResRes min. ' + str(mindist) + ' Ang, ' + 'Delta min. ' + str(mindelta) + ' Ang"' + "\n") gnuoutfile.write("# x is column" + "\n") gnuoutfile.write("set xlabel 'Res nr. for " + str(molecule2) + "'" + "\n") gnuoutfile.write("# y is row" + "\n") gnuoutfile.write("set ylabel 'Res nr. for " + str(molecule1) + "'" + "\n") gnuoutfile.write("\n") gnuoutfile.write("#set xrange [300:550]; set yrange [0:400]" + "\n") gnuoutfile.write("#set xtics 50" + "\n") gnuoutfile.write("#set ytics 50" + "\n") gnuoutfile.write("#set mxtics 5" + "\n") gnuoutfile.write("#set mytics 5" + "\n") gnuoutfile.write("set size ratio 1" + "\n") gnuoutfile.write("unset key" + "\n") gnuoutfile.write("\n") gnuoutfile.write("set cbrange [-30:30]" + "\n") gnuoutfile.write("set palette defined (-30 'blue', 0 'white', 30 'red')" + "\n") gnuoutfile.write("set pm3d map" + "\n") gnuoutfile.write("\n") gnuoutfile.write("#set term postscript eps enhanced color" + "\n") gnuoutfile.write('#set output "' + filename + '.eps"' + "\n") gnuoutfile.write("set term png" + "\n") gnuoutfile.write('set output "' + filename + '.png"' + "\n") gnuoutfile.write("splot '" + str(filename + ".txt") + "' matrix" + "\n") gnuoutfile.write("\n") gnuoutfile.write("#For the backbone displacement" + "\n") gnuoutfile.write("\n") gnuoutfile.write('set title "Protein ' + str(atom) + ' Backbone displacement"' + "\n") gnuoutfile.write("set xlabel 'Residue number'" + "\n") gnuoutfile.write("set ylabel '" + str(atom) + " displacement (Ang)'" + "\n") gnuoutfile.write("\n") gnuoutfile.write("#set xrange [0:550]; set yrange [0:40]" + "\n") gnuoutfile.write("#set xtics 50" + "\n") gnuoutfile.write("#set ytics 10" + "\n") gnuoutfile.write("#set mxtics 5" + "\n") gnuoutfile.write("#set mytics 5" + "\n") gnuoutfile.write("set size ratio 0.75" + "\n") gnuoutfile.write("unset key" + "\n") gnuoutfile.write("\n") gnuoutfile.write("#set term postscript eps enhanced color" + "\n") gnuoutfile.write('#set output "' + filename + '-backbone.eps"' + "\n") gnuoutfile.write("set term png" + "\n") gnuoutfile.write('set output "' + filename + '-backbone.png"' + "\n") gnuoutfile.write("plot '" + str(filename + "-backbone.txt") + "' using 1:2 title 'Backbone displacement' with lines" + "\n") gnuoutfile.close() # Create stick residue objects for i in range(len(MaxNegDist)): name = str(i) + "_" + str(round(MaxNegDist[i][0], 1)) + "_" + shortaa(str(MaxNegDist[i][5])) + str(MaxNegDist[i][1]) + shortaa(str(MaxNegDist[i][6])) + str(MaxNegDist[i][2]) selection = str(molecule1) + " and resi " + str(MaxNegDist[i][1]) + "+" + str(MaxNegDist[i][2]) + " or " + str(molecule2) + " and resi " + str(MaxNegDist[i][2]) # print selection cmd.create(name, selection) if showsticks == 'yes' or showsticks == 'y': cmd.show("sticks", name) for i in range(len(MaxPosDist)): name = str(i) + "_" + str(round(MaxPosDist[i][0], 1)) + "_" + shortaa(str(MaxPosDist[i][5])) + str(MaxPosDist[i][1]) + shortaa(str(MaxPosDist[i][6])) + str(MaxPosDist[i][2]) selection = str(molecule1) + " and resi " + str(MaxPosDist[i][1]) + "+" + str(MaxPosDist[i][2]) + " or " + str(molecule2) + " and resi " + str(MaxPosDist[i][2]) # print selection cmd.create(name, selection) if showsticks == 'yes' or showsticks == 'y': cmd.show("sticks", name) cmd.extend("dispmap", dispmap) def create_nXn_matrix(n): return [[0.0 for x in range(n)] for x in range(n)] def distance(array1, array2, i, j): i = int(i) j = int(j) dist = sqrt((array1[i][0] - array2[j][0]) ** 2 + (array1[i][1] - array2[j][1]) ** 2 + (array1[i][2] - array2[j][2]) ** 2) return dist def Coord(Input): print(cmd.get_atom_coords(Input)) cmd.extend("Coord", Coord) def replace_words(text, word_dic): rc = re.compile('|'.join(map(re.escape, word_dic))) def translate(match): return word_dic[match.group(0)] return rc.sub(translate, text) def shortaa(longaa): aa_dic = {'ARG': 'R', 'HIS': 'H', 'LYS': 'K', 'ASP': 'D', 'GLU': 'E', 'SER': 'S', 'THR': 'T', 'ASN': 'N', 'GLN': 'Q', 'CYS': 'C', 'SEC': 'U', 'GLY': 'G', 'PRO': 'P', 'ALA': 'A', 'ILE': 'I', 'LEU': 'L', 'MET': 'M', 'PHE': 'F', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'} return(replace_words(longaa, aa_dic)) def cysb62(aa): # BLOSUM62 cys mutation # C S T P A G N D E Q H R K M I L V F Y W # C9 -1 -1 -3 0 -3 -3 -3 -4 -3 -3 -3 -3 -1 -1 -1 -1 -2 -2 -2 b62_dic = {'R': '-3', 'H': '-3', 'K': '-1', 'D': '-3', 'E': '-4', 'S': '-1', 'T': '-1', 'N': '-3', 'Q': '-3', 'C': '9', 'U': '9', 'G': '-3', 'P': '-3', 'A': '0', 'I': '-1', 'L': '-1', 'M': '-1', 'F': '-2', 'W': '-2', 'Y': '-2', 'V': '-1'} return(replace_words(aa, b62_dic)) def pam250(aa): # A R N D C Q E G H I L K M F P S T W Y V # C 2 1 1 1 52 1 1 2 2 2 1 1 1 1 2 3 2 1 4 2 # Mutation probability matrix for the evolutionary distance of 250 PAMs. # To simplify the appearance, the elements are shown multiplied by 100. # In comparing two sequences of average amino acid frequency at this evolutionary distance, # there is a 13% probability that a position containing Ala in the first sequence will contain Ala in the second. # There is a 3% chance that it will contain Arg, and so forth. # (Adapted from Figure 83. Atlas of Protein Sequence and Structure, Suppl 3, 1978, M.O. Dayhoff, ed. National Biomedical Research Foundation, 1979.) pam250_dic = {'R': '1', 'H': '2', 'K': '1', 'D': '1', 'E': '1', 'S': '3', 'T': '2', 'N': '1', 'Q': '1', 'C': '52', 'U': '52', 'G': '2', 'P': '2', 'A': '2', 'I': '2', 'L': '1', 'M': '1', 'F': '1', 'W': '1', 'Y': '4', 'V': '2'} return(replace_words(aa, pam250_dic))