{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Creation of RNA alignments from databases" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AP006878.1/1107433-1107263\tGCCGACAGCGAGGGGACAGUCAGUCUCCUCGCA-GGGCUCGGUACAACCGCCUCCGCAAG\n", "BA000001.2/1531413-1531580\tGCCGACAGCGAGGGUACAAUA----CCCUCGCAAGGGCUCGGUCAACCCGCCCCCGCAAG\n", "AF468962.1/1-168\tGCCGACAGCGAGGGUACAAUA----CCCUCGCAAGGGCUCGGUCAACCCGCCCCCGCAAG\n", "AJ248284.1/11131-11298\tGCCGACAGCGAGGGUACAAU-----CCCUCGCAGGGGCUCGGUCUACCCGCCCCCGCAAG\n", "\n", "AP006878.1/1107433-1107263\tGU-AUCGGGUUCCGUGAGCGGAGCGUGCUCACGCCGAGCCCACAGGGCCGGGAGCAUCCA\n", "BA000001.2/1531413-1531580\tGU-UUCGGGGUCGAUGAGCGGGGUGUGCUCACGCCGAGCCUACAGGGCCGG-UGCAUCCG\n", "AF468962.1/1-168\tGU-UUCGGGUUCGAUGAGCGGGGUGUGCUCACGCCGAGCCUACAGGGCCGG-UGCAUCCG\n", "AJ248284.1/11131-11298\tGUGUUCGGGUUCGAUGAGCGGGGUGUGCUCACGCCGAGCCCACAGGGCCGG-UGCAUCCG\n", "\n", "AP006878.1/1107433-1107263\tCCCGCGGGAGCAGUGACCGC-GGGCCUCUGUACCCGGCCCACAUUUCGAUGCCC\n", "BA000001.2/1531413-1531580\tCCCGCGGGAUUAAUGACCGCUGGGUCUCUGUUGCCGGCCCACAAUAAAGUUUAA\n", "AF468962.1/1-168\tCCCGCGGGAUCAAUGACCGCUGGGUCUCUGUUGCCGGCCCACAAAAGUAUUUUA\n", "AJ248284.1/11131-11298\tCCCGCGGGAUCAAUGACCGCCGGGUCUCUGUUGCCGGCCUACAGCAAAAAUUUA\n", "\n", "2D\t.......(((((((...........)))))))..((((((((....(((((.((((.........))))........)))))...........))))))))...(((((((.........(((((((.........)))).))).........)))))))..............\n" ] } ], "source": [ "from pyrna.db import Rfam\n", "rfam = Rfam(use_website = True)\n", "aligned_rnas, species, consensus2D = rfam.get_entry(rfam_id = 'RF00058', nse_labels = 0)\n", "\n", "structural_alignment = to_clustalw(consensus2D, aligned_rnas)\n", "print structural_alignment" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'diagonals': [],\n", " 'directly-linked-helices': [],\n", " 'helices': [{'coords': [[0.0, 200], [0.0, 140]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'GCGAGGG', 'CCCUCGC'],\n", " ['AJ248284.1/11131-11298', 'GCGAGGG', 'CCCUCGC'],\n", " ['AF468962.1/1-168', 'GCGAGGG', 'CCCUCGC'],\n", " ['AP006878.1/1107433-1107263', 'GCGAGGG', 'UCCUCGC']],\n", " 'location': [[8, 14], [26, 32]],\n", " 'name': 'H1',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[40.0, 200], [40.0, 130]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'GGGCUCGG', 'CCGAGCCU'],\n", " ['AJ248284.1/11131-11298', 'GGGCUCGG', 'CCGAGCCC'],\n", " ['AF468962.1/1-168', 'GGGCUCGG', 'CCGAGCCU'],\n", " ['AP006878.1/1107433-1107263', 'GGGCUCGG', 'CCGAGCCC']],\n", " 'location': [[35, 42], [94, 101]],\n", " 'name': 'H2',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[40.0, 100.0], [40.0, 60.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'CCCGC', 'GCGGG'],\n", " ['AJ248284.1/11131-11298', 'CCCGC', 'GCGGG'],\n", " ['AF468962.1/1-168', 'CCCGC', 'GCGGG'],\n", " ['AP006878.1/1107433-1107263', 'ACCGC', 'GCGGA']],\n", " 'location': [[47, 51], [78, 82]],\n", " 'name': 'H3',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[40.0, 30.0], [40.0, 0.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'CCCG', 'CGGG'],\n", " ['AJ248284.1/11131-11298', 'CCCG', 'CGGG'],\n", " ['AF468962.1/1-168', 'CCCG', 'CGGG'],\n", " ['AP006878.1/1107433-1107263', 'UCCG', 'CGGG']],\n", " 'location': [[53, 56], [66, 69]],\n", " 'name': 'H4',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[80.0, 200], [80.0, 140]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'GGGCCGG', 'CCGGCCC'],\n", " ['AJ248284.1/11131-11298', 'GGGCCGG', 'CCGGCCU'],\n", " ['AF468962.1/1-168', 'GGGCCGG', 'CCGGCCC'],\n", " ['AP006878.1/1107433-1107263', 'GGGCCGG', 'CCGGCCC']],\n", " 'location': [[105, 111], [154, 160]],\n", " 'name': 'H5',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[80.0, 110.0], [80.0, 90.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'CCC', 'GGG'],\n", " ['AJ248284.1/11131-11298', 'CCC', 'GGG'],\n", " ['AF468962.1/1-168', 'CCC', 'GGG'],\n", " ['AP006878.1/1107433-1107263', 'CCC', 'GGG']],\n", " 'location': [[121, 123], [142, 144]],\n", " 'name': 'H6',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[80.0, 60.0], [80.0, 30.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'GCGG', 'CCGC'],\n", " ['AJ248284.1/11131-11298', 'GCGG', 'CCGC'],\n", " ['AF468962.1/1-168', 'GCGG', 'CCGC'],\n", " ['AP006878.1/1107433-1107263', 'GCGG', 'CCGC']],\n", " 'location': [[124, 127], [137, 140]],\n", " 'name': 'H7',\n", " 'quantitative_value': 0.0}],\n", " 'junctions': [{'coords': [[0.0, 125]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'UACAAUA'],\n", " ['AJ248284.1/11131-11298', 'UACAAU'],\n", " ['AF468962.1/1-168', 'UACAAUA'],\n", " ['AP006878.1/1107433-1107263', 'GACAGUCAGUC']],\n", " 'location': [[14, 26]],\n", " 'quantitative_value': 1.920286436967152},\n", " {'coords': [[40.0, 115]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'UCAA', 'GUGUGCUCACG'],\n", " ['AJ248284.1/11131-11298', 'UCUA', 'GUGUGCUCACG'],\n", " ['AF468962.1/1-168', 'UCAA', 'GUGUGCUCACG'],\n", " ['AP006878.1/1107433-1107263', 'UACA', 'GCGUGCUCACG']],\n", " 'location': [[42, 47], [82, 94]],\n", " 'quantitative_value': 0.0},\n", " {'coords': [[40.0, 45.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'C', 'GUCGAUGA'],\n", " ['AJ248284.1/11131-11298', 'C', 'UUCGAUGA'],\n", " ['AF468962.1/1-168', 'C', 'UUCGAUGA'],\n", " ['AP006878.1/1107433-1107263', 'C', 'UUCCGUGA']],\n", " 'location': [[51, 53], [69, 78]],\n", " 'quantitative_value': 0.0},\n", " {'coords': [[40.0, -15.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'CAAGGUUU'],\n", " ['AJ248284.1/11131-11298', 'CAAGGUGUU'],\n", " ['AF468962.1/1-168', 'CAAGGUUU'],\n", " ['AP006878.1/1107433-1107263', 'CAAGGUAU']],\n", " 'location': [[56, 66]],\n", " 'quantitative_value': 0.4330127018922193},\n", " {'coords': [[80.0, 125]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'UGCAUCCG', 'UCUCUGUUG'],\n", " ['AJ248284.1/11131-11298', 'UGCAUCCG', 'UCUCUGUUG'],\n", " ['AF468962.1/1-168', 'UGCAUCCG', 'UCUCUGUUG'],\n", " ['AP006878.1/1107433-1107263', 'GAGCAUCCA', 'CCUCUGUAC']],\n", " 'location': [[111, 121], [144, 154]],\n", " 'quantitative_value': 0.4330127018922193},\n", " {'coords': [[80.0, 15.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'GAUUAAUGA'],\n", " ['AJ248284.1/11131-11298', 'GAUCAAUGA'],\n", " ['AF468962.1/1-168', 'GAUCAAUGA'],\n", " ['AP006878.1/1107433-1107263', 'GAGCAGUGA']],\n", " 'location': [[127, 137]],\n", " 'quantitative_value': 0.0},\n", " {'coords': [[80.0, 75.0]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', '', 'U'],\n", " ['AJ248284.1/11131-11298', '', 'C'],\n", " ['AF468962.1/1-168', '', 'U'],\n", " ['AP006878.1/1107433-1107263', '', '']],\n", " 'location': [[123, 124], [140, 142]],\n", " 'quantitative_value': 0.4330127018922193}],\n", " 'single_strands': [{'coords': [[-40.0, 200], [0.0, 200]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'GCCGACA'],\n", " ['AJ248284.1/11131-11298', 'GCCGACA'],\n", " ['AF468962.1/1-168', 'GCCGACA'],\n", " ['AP006878.1/1107433-1107263', 'GCCGACA']],\n", " 'location': [1, 7],\n", " 'name': 'SS1',\n", " 'quantitative_value': 0.0},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'UACAAUA'],\n", " ['AJ248284.1/11131-11298', 'UACAAU'],\n", " ['AF468962.1/1-168', 'UACAAUA'],\n", " ['AP006878.1/1107433-1107263', 'GACAGUCAGUC']],\n", " 'location': [15, 25],\n", " 'name': 'SS2',\n", " 'quantitative_value': 1.920286436967152},\n", " {'coords': [[0.0, 200], [40.0, 200]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'AA'],\n", " ['AJ248284.1/11131-11298', 'AG'],\n", " ['AF468962.1/1-168', 'AA'],\n", " ['AP006878.1/1107433-1107263', 'A']],\n", " 'location': [33, 34],\n", " 'name': 'SS3',\n", " 'quantitative_value': 0.4330127018922193},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'UCAA'],\n", " ['AJ248284.1/11131-11298', 'UCUA'],\n", " ['AF468962.1/1-168', 'UCAA'],\n", " ['AP006878.1/1107433-1107263', 'UACA']],\n", " 'location': [43, 46],\n", " 'name': 'SS4',\n", " 'quantitative_value': 0.0},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'C'],\n", " ['AJ248284.1/11131-11298', 'C'],\n", " ['AF468962.1/1-168', 'C'],\n", " ['AP006878.1/1107433-1107263', 'C']],\n", " 'location': [52, 52],\n", " 'name': 'SS5',\n", " 'quantitative_value': 0.0},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'CAAGGUUU'],\n", " ['AJ248284.1/11131-11298', 'CAAGGUGUU'],\n", " ['AF468962.1/1-168', 'CAAGGUUU'],\n", " ['AP006878.1/1107433-1107263', 'CAAGGUAU']],\n", " 'location': [57, 65],\n", " 'name': 'SS6',\n", " 'quantitative_value': 0.4330127018922193},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'GUCGAUGA'],\n", " ['AJ248284.1/11131-11298', 'UUCGAUGA'],\n", " ['AF468962.1/1-168', 'UUCGAUGA'],\n", " ['AP006878.1/1107433-1107263', 'UUCCGUGA']],\n", " 'location': [70, 77],\n", " 'name': 'SS7',\n", " 'quantitative_value': 0.0},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'GUGUGCUCACG'],\n", " ['AJ248284.1/11131-11298', 'GUGUGCUCACG'],\n", " ['AF468962.1/1-168', 'GUGUGCUCACG'],\n", " ['AP006878.1/1107433-1107263', 'GCGUGCUCACG']],\n", " 'location': [83, 93],\n", " 'name': 'SS8',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[40.0, 200], [80.0, 200]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'ACA'],\n", " ['AJ248284.1/11131-11298', 'ACA'],\n", " ['AF468962.1/1-168', 'ACA'],\n", " ['AP006878.1/1107433-1107263', 'ACA']],\n", " 'location': [102, 104],\n", " 'name': 'SS9',\n", " 'quantitative_value': 0.0},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'UGCAUCCG'],\n", " ['AJ248284.1/11131-11298', 'UGCAUCCG'],\n", " ['AF468962.1/1-168', 'UGCAUCCG'],\n", " ['AP006878.1/1107433-1107263', 'GAGCAUCCA']],\n", " 'location': [112, 120],\n", " 'name': 'SS10',\n", " 'quantitative_value': 0.4330127018922193},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'GAUUAAUGA'],\n", " ['AJ248284.1/11131-11298', 'GAUCAAUGA'],\n", " ['AF468962.1/1-168', 'GAUCAAUGA'],\n", " ['AP006878.1/1107433-1107263', 'GAGCAGUGA']],\n", " 'location': [128, 136],\n", " 'name': 'SS11',\n", " 'quantitative_value': 0.0},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'U'],\n", " ['AJ248284.1/11131-11298', 'C'],\n", " ['AF468962.1/1-168', 'U'],\n", " ['AP006878.1/1107433-1107263', '']],\n", " 'location': [141, 141],\n", " 'name': 'SS12',\n", " 'quantitative_value': 0.4330127018922193},\n", " {'descriptions': [['BA000001.2/1531413-1531580', 'UCUCUGUUG'],\n", " ['AJ248284.1/11131-11298', 'UCUCUGUUG'],\n", " ['AF468962.1/1-168', 'UCUCUGUUG'],\n", " ['AP006878.1/1107433-1107263', 'CCUCUGUAC']],\n", " 'location': [145, 153],\n", " 'name': 'SS13',\n", " 'quantitative_value': 0.0},\n", " {'coords': [[80.0, 200], [120.0, 200]],\n", " 'descriptions': [['BA000001.2/1531413-1531580', 'ACAAUAAAGUUUAA'],\n", " ['AJ248284.1/11131-11298', 'ACAGCAAAAAUUUA'],\n", " ['AF468962.1/1-168', 'ACAAAAGUAUUUUA'],\n", " ['AP006878.1/1107433-1107263', 'ACAUUUCGAUGCCC']],\n", " 'location': [161, 174],\n", " 'name': 'SS14',\n", " 'quantitative_value': 0.0}]}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "consensus2d_to_booquet(structural_alignment)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }