{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "d=pd.read_csv('histone_genes.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Histone type | \n", "Histone variant | \n", "HGNC symbol | \n", "NCBI gene ID | \n", "Ensembl gene ID | \n", "Expr. timing | \n", "Expr. pattern | \n", "Biotype | \n", "Bona fide canonical | \n", "PMIDs | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "H1 | \n", "H1.0 | \n", "H1-0 | \n", "3005 | \n", "ENSG00000189060 | \n", "RI | \n", "NaN | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 1 | \n", "H1 | \n", "H1.1 | \n", "H1-1 | \n", "3024 | \n", "ENSG00000124610 | \n", "RD | \n", "NaN | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 2 | \n", "H1 | \n", "H1.2 | \n", "H1-2 | \n", "3006 | \n", "ENSG00000187837 | \n", "Mixed | \n", "NaN | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 3 | \n", "H1 | \n", "H1.3 | \n", "H1-3 | \n", "3007 | \n", "ENSG00000124575 | \n", "RD | \n", "NaN | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 4 | \n", "H1 | \n", "H1.4 | \n", "H1-4 | \n", "3008 | \n", "ENSG00000168298 | \n", "RD | \n", "NaN | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 5 | \n", "H1 | \n", "H1.5 | \n", "H1-5 | \n", "3009 | \n", "ENSG00000184357 | \n", "RD | \n", "NaN | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 6 | \n", "H1 | \n", "TS H1.6 | \n", "H1-6 | \n", "3010 | \n", "ENSG00000187475 | \n", "RD | \n", "TS | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 7 | \n", "H1 | \n", "TS H1.7 | \n", "H1-7 | \n", "341567 | \n", "ENSG00000187166 | \n", "RI | \n", "TS | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 8 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "RI | \n", "OO | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 9 | \n", "H1 | \n", "TS H1.9(?) | \n", "H1-9P | \n", "373861 | \n", "ENSG00000188662 | \n", "RI | \n", "TS | \n", "COD | \n", "NaN | \n", "12920187 26689747 17852044 | \n", "
| 10 | \n", "H1 | \n", "H1.10 | \n", "H1-10 | \n", "8971 | \n", "ENSG00000184897 | \n", "RI | \n", "NaN | \n", "COD | \n", "NaN | \n", "26689747 | \n", "
| 11 | \n", "H1 | \n", "NaN | \n", "H1-12P | \n", "387325 | \n", "ENSG00000216331 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 12 | \n", "H2A | \n", "TS H2A.1 | \n", "H2AC1 | \n", "221613 | \n", "ENSG00000164508 | \n", "Mixed | \n", "TS | \n", "COD | \n", "NaN | \n", "2011515 7068607 24506885 | \n", "
| 13 | \n", "H2A | \n", "NaN | \n", "H2AC2P | \n", "387319 | \n", "ENSG00000216436 | \n", "NaN | \n", "NaN | \n", "PS | \n", "canonical | \n", "12408966 25731851 | \n", "
| 14 | \n", "H2A | \n", "NaN | \n", "H2AC3P | \n", "85303 | \n", "ENSG00000242387 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 15 | \n", "H2A | \n", "canonical H2A | \n", "H2AC4 | \n", "8335 | \n", "ENSG00000278463 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 16 | \n", "H2A | \n", "NaN | \n", "H2AC5P | \n", "10341 | \n", "ENSG00000234816 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 17 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 18 | \n", "H2A | \n", "canonical H2A | \n", "H2AC7 | \n", "3013 | \n", "ENSG00000196866 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 19 | \n", "H2A | \n", "canonical H2A | \n", "H2AC8 | \n", "3012 | \n", "ENSG00000277075 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 20 | \n", "H2A | \n", "NaN | \n", "H2AC9P | \n", "387323 | \n", "ENSG00000218281 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 21 | \n", "H2A | \n", "NaN | \n", "H2AC10P | \n", "8333 | \n", "ENSG00000218690 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 22 | \n", "H2A | \n", "canonical H2A | \n", "H2AC11 | \n", "8969 | \n", "ENSG00000196787 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 23 | \n", "H2A | \n", "canonical H2A | \n", "H2AC12 | \n", "85235 | \n", "ENSG00000274997 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 24 | \n", "H2A | \n", "canonical H2A | \n", "H2AC13 | \n", "8329 | \n", "ENSG00000196747 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 25 | \n", "H2A | \n", "canonical H2A | \n", "H2AC14 | \n", "8331 | \n", "ENSG00000276368 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 26 | \n", "H2A | \n", "canonical H2A | \n", "H2AC15 | \n", "8330 | \n", "ENSG00000275221 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 27 | \n", "H2A | \n", "canonical H2A | \n", "H2AC16 | \n", "8332 | \n", "ENSG00000276903 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 28 | \n", "H2A | \n", "canonical H2A | \n", "H2AC17 | \n", "8336 | \n", "ENSG00000278677 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| 29 | \n", "H2A | \n", "canonical H2A | \n", "H2AC18 | \n", "8337 | \n", "ENSG00000203812 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 25731851 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 103 | \n", "H3 | \n", "canonical H3.2 | \n", "H3C14 | \n", "126961 | \n", "ENSG00000203811 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 104 | \n", "H3 | \n", "canonical H3.1 | \n", "H3C15 | \n", "333932 | \n", "ENSG00000203852 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 105 | \n", "H3 | \n", "H3.Y.1 | \n", "H3Y1 | \n", "391769 | \n", "ENSG00000269466 | \n", "RI | \n", "NaN | \n", "COD | \n", "NaN | \n", "20819935 | \n", "
| 106 | \n", "H3 | \n", "H3.Y.2 | \n", "H3Y2 | \n", "340096 | \n", "ENSG00000268799 | \n", "RI | \n", "NaN | \n", "COD | \n", "NaN | \n", "20819935 | \n", "
| 107 | \n", "H3 | \n", "canonical H3(?) | \n", "H3-2 | \n", "440686 | \n", "ENSG00000273213 | \n", "RD | \n", "NaN | \n", "COD | \n", "NaN | \n", "12408966 | \n", "
| 108 | \n", "H3 | \n", "H3.3 | \n", "H3-3A | \n", "3020 | \n", "ENSG00000163041 | \n", "RI | \n", "NaN | \n", "COD | \n", "NaN | \n", "19412883 | \n", "
| 109 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "RI | \n", "NaN | \n", "COD | \n", "NaN | \n", "19412883 | \n", "
| 110 | \n", "H3 | \n", "TS H3.4 | \n", "H3-4 | \n", "8290 | \n", "ENSG00000168148 | \n", "RI | \n", "TS | \n", "COD | \n", "NaN | \n", "8986613 | \n", "
| 111 | \n", "H3 | \n", "H3.5 | \n", "H3-5 | \n", "440093 | \n", "ENSG00000188375 | \n", "RI | \n", "TS | \n", "COD | \n", "NaN | \n", "21274551 | \n", "
| 112 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "RI | \n", "NaN | \n", "COD | \n", "NaN | \n", "23324462 | \n", "
| 113 | \n", "H3 | \n", "NaN | \n", "H3P26 | \n", "10338 | \n", "ENSG00000224447 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 114 | \n", "H3 | \n", "NaN | \n", "H3P4 | \n", "106479023 | \n", "ENSG00000213244 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 115 | \n", "H3 | \n", "NaN | \n", "H3P37 | \n", "664611 | \n", "ENSG00000270433 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 116 | \n", "H3 | \n", "NaN | \n", "H3P38 | \n", "654505 | \n", "ENSG00000259389 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 117 | \n", "H4 | \n", "canonical H4 | \n", "H4C1 | \n", "8359 | \n", "ENSG00000278637 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 118 | \n", "H4 | \n", "canonical H4 | \n", "H4C2 | \n", "8366 | \n", "ENSG00000278705 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 119 | \n", "H4 | \n", "canonical H4 | \n", "H4C3 | \n", "8364 | \n", "ENSG00000197061 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 120 | \n", "H4 | \n", "canonical H4 | \n", "H4C4 | \n", "8360 | \n", "ENSG00000277157 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 121 | \n", "H4 | \n", "canonical H4 | \n", "H4C5 | \n", "8367 | \n", "ENSG00000276966 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 122 | \n", "H4 | \n", "canonical H4 | \n", "H4C6 | \n", "8361 | \n", "ENSG00000274618 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 123 | \n", "H4 | \n", "canonical H4 | \n", "H4C7 | \n", "8369 | \n", "ENSG00000275663 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 124 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 125 | \n", "H4 | \n", "canonical H4 | \n", "H4C9 | \n", "8294 | \n", "ENSG00000276180 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 126 | \n", "H4 | \n", "NaN | \n", "H4C10P | \n", "10337 | \n", "ENSG00000217862 | \n", "NaN | \n", "NaN | \n", "PS | \n", "NaN | \n", "NaN | \n", "
| 127 | \n", "H4 | \n", "canonical H4 | \n", "H4C11 | \n", "8363 | \n", "ENSG00000197238 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 128 | \n", "H4 | \n", "canonical H4 | \n", "H4C12 | \n", "8362 | \n", "ENSG00000273542 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 129 | \n", "H4 | \n", "canonical H4 | \n", "H4C13 | \n", "8368 | \n", "ENSG00000275126 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 130 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 131 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
| 132 | \n", "H4 | \n", "canonical H4 | \n", "H4-16 | \n", "121504 | \n", "ENSG00000197837 | \n", "RD | \n", "NaN | \n", "COD | \n", "canonical | \n", "12408966 | \n", "
133 rows × 10 columns
\n", "| \n", " | Histone type | \n", "Histone variant | \n", "HGNC symbol | \n", "NCBI gene ID | \n", "Ensembl gene ID | \n", "
|---|---|---|---|---|---|
| 0 | \n", "H1 | \n", "H1.0 | \n", "H1-0 | \n", "3005 | \n", "ENSG00000189060 | \n", "
| 1 | \n", "H1 | \n", "H1.1 | \n", "H1-1 | \n", "3024 | \n", "ENSG00000124610 | \n", "
| 2 | \n", "H1 | \n", "H1.2 | \n", "H1-2 | \n", "3006 | \n", "ENSG00000187837 | \n", "
| 3 | \n", "H1 | \n", "H1.3 | \n", "H1-3 | \n", "3007 | \n", "ENSG00000124575 | \n", "
| 4 | \n", "H1 | \n", "H1.4 | \n", "H1-4 | \n", "3008 | \n", "ENSG00000168298 | \n", "
| 5 | \n", "H1 | \n", "H1.5 | \n", "H1-5 | \n", "3009 | \n", "ENSG00000184357 | \n", "
| 6 | \n", "H1 | \n", "TS H1.6 | \n", "H1-6 | \n", "3010 | \n", "ENSG00000187475 | \n", "
| 7 | \n", "H1 | \n", "TS H1.7 | \n", "H1-7 | \n", "341567 | \n", "ENSG00000187166 | \n", "
| 8 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "
| 9 | \n", "H1 | \n", "TS H1.9(?) | \n", "H1-9P | \n", "373861 | \n", "ENSG00000188662 | \n", "
| 10 | \n", "H1 | \n", "H1.10 | \n", "H1-10 | \n", "8971 | \n", "ENSG00000184897 | \n", "
| 12 | \n", "H2A | \n", "TS H2A.1 | \n", "H2AC1 | \n", "221613 | \n", "ENSG00000164508 | \n", "
| 15 | \n", "H2A | \n", "canonical H2A | \n", "H2AC4 | \n", "8335 | \n", "ENSG00000278463 | \n", "
| 17 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "
| 18 | \n", "H2A | \n", "canonical H2A | \n", "H2AC7 | \n", "3013 | \n", "ENSG00000196866 | \n", "
| 19 | \n", "H2A | \n", "canonical H2A | \n", "H2AC8 | \n", "3012 | \n", "ENSG00000277075 | \n", "
| 22 | \n", "H2A | \n", "canonical H2A | \n", "H2AC11 | \n", "8969 | \n", "ENSG00000196787 | \n", "
| 23 | \n", "H2A | \n", "canonical H2A | \n", "H2AC12 | \n", "85235 | \n", "ENSG00000274997 | \n", "
| 24 | \n", "H2A | \n", "canonical H2A | \n", "H2AC13 | \n", "8329 | \n", "ENSG00000196747 | \n", "
| 25 | \n", "H2A | \n", "canonical H2A | \n", "H2AC14 | \n", "8331 | \n", "ENSG00000276368 | \n", "
| 26 | \n", "H2A | \n", "canonical H2A | \n", "H2AC15 | \n", "8330 | \n", "ENSG00000275221 | \n", "
| 27 | \n", "H2A | \n", "canonical H2A | \n", "H2AC16 | \n", "8332 | \n", "ENSG00000276903 | \n", "
| 28 | \n", "H2A | \n", "canonical H2A | \n", "H2AC17 | \n", "8336 | \n", "ENSG00000278677 | \n", "
| 29 | \n", "H2A | \n", "canonical H2A | \n", "H2AC18 | \n", "8337 | \n", "ENSG00000203812 | \n", "
| 30 | \n", "H2A | \n", "canonical H2A | \n", "H2AC19 | \n", "723790 | \n", "ENSG00000272196 | \n", "
| 31 | \n", "H2A | \n", "canonical H2A | \n", "H2AC20 | \n", "8338 | \n", "ENSG00000184260 | \n", "
| 32 | \n", "H2A | \n", "canonical H2A | \n", "H2AC21 | \n", "317772 | \n", "ENSG00000184270 | \n", "
| 33 | \n", "H2A | \n", "H2A.J(?) | \n", "H2AJ | \n", "55766 | \n", "ENSG00000246705 | \n", "
| 34 | \n", "H2A | \n", "canonical H2A | \n", "H2AW | \n", "92815 | \n", "ENSG00000181218 | \n", "
| 35 | \n", "H2A | \n", "H2A.X | \n", "H2AX | \n", "3014 | \n", "ENSG00000188486 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 97 | \n", "H3 | \n", "canonical H3.1 | \n", "H3C8 | \n", "8355 | \n", "ENSG00000273983 | \n", "
| 99 | \n", "H3 | \n", "canonical H3.1 | \n", "H3C10 | \n", "8357 | \n", "ENSG00000278828 | \n", "
| 100 | \n", "H3 | \n", "canonical H3.1 | \n", "H3C11 | \n", "8354 | \n", "ENSG00000275379 | \n", "
| 101 | \n", "H3 | \n", "canonical H3.1 | \n", "H3C12 | \n", "8356 | \n", "ENSG00000197153 | \n", "
| 102 | \n", "H3 | \n", "canonical H3.1 | \n", "H3C13 | \n", "653604 | \n", "ENSG00000183598 | \n", "
| 103 | \n", "H3 | \n", "canonical H3.2 | \n", "H3C14 | \n", "126961 | \n", "ENSG00000203811 | \n", "
| 104 | \n", "H3 | \n", "canonical H3.1 | \n", "H3C15 | \n", "333932 | \n", "ENSG00000203852 | \n", "
| 105 | \n", "H3 | \n", "H3.Y.1 | \n", "H3Y1 | \n", "391769 | \n", "ENSG00000269466 | \n", "
| 106 | \n", "H3 | \n", "H3.Y.2 | \n", "H3Y2 | \n", "340096 | \n", "ENSG00000268799 | \n", "
| 107 | \n", "H3 | \n", "canonical H3(?) | \n", "H3-2 | \n", "440686 | \n", "ENSG00000273213 | \n", "
| 108 | \n", "H3 | \n", "H3.3 | \n", "H3-3A | \n", "3020 | \n", "ENSG00000163041 | \n", "
| 109 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "
| 110 | \n", "H3 | \n", "TS H3.4 | \n", "H3-4 | \n", "8290 | \n", "ENSG00000168148 | \n", "
| 111 | \n", "H3 | \n", "H3.5 | \n", "H3-5 | \n", "440093 | \n", "ENSG00000188375 | \n", "
| 112 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "
| 117 | \n", "H4 | \n", "canonical H4 | \n", "H4C1 | \n", "8359 | \n", "ENSG00000278637 | \n", "
| 118 | \n", "H4 | \n", "canonical H4 | \n", "H4C2 | \n", "8366 | \n", "ENSG00000278705 | \n", "
| 119 | \n", "H4 | \n", "canonical H4 | \n", "H4C3 | \n", "8364 | \n", "ENSG00000197061 | \n", "
| 120 | \n", "H4 | \n", "canonical H4 | \n", "H4C4 | \n", "8360 | \n", "ENSG00000277157 | \n", "
| 121 | \n", "H4 | \n", "canonical H4 | \n", "H4C5 | \n", "8367 | \n", "ENSG00000276966 | \n", "
| 122 | \n", "H4 | \n", "canonical H4 | \n", "H4C6 | \n", "8361 | \n", "ENSG00000274618 | \n", "
| 123 | \n", "H4 | \n", "canonical H4 | \n", "H4C7 | \n", "8369 | \n", "ENSG00000275663 | \n", "
| 124 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "
| 125 | \n", "H4 | \n", "canonical H4 | \n", "H4C9 | \n", "8294 | \n", "ENSG00000276180 | \n", "
| 127 | \n", "H4 | \n", "canonical H4 | \n", "H4C11 | \n", "8363 | \n", "ENSG00000197238 | \n", "
| 128 | \n", "H4 | \n", "canonical H4 | \n", "H4C12 | \n", "8362 | \n", "ENSG00000273542 | \n", "
| 129 | \n", "H4 | \n", "canonical H4 | \n", "H4C13 | \n", "8368 | \n", "ENSG00000275126 | \n", "
| 130 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "
| 131 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "
| 132 | \n", "H4 | \n", "canonical H4 | \n", "H4-16 | \n", "121504 | \n", "ENSG00000197837 | \n", "
96 rows × 5 columns
\n", "| \n", " | Histone type | \n", "Histone variant | \n", "HGNC symbol | \n", "NCBI gene ID | \n", "Ensembl gene ID | \n", "Transcript stable ID | \n", "Protein stable ID | \n", "RefSeq mRNA ID | \n", "RefSeq peptide ID | \n", "Transcript type | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "H1 | \n", "H1.0 | \n", "H1-0 | \n", "3005 | \n", "ENSG00000189060 | \n", "ENST00000340857 | \n", "ENSP00000344504 | \n", "NM_005318 | \n", "NP_005309 | \n", "protein_coding | \n", "
| 1 | \n", "H1 | \n", "H1.1 | \n", "H1-1 | \n", "3024 | \n", "ENSG00000124610 | \n", "ENST00000244573 | \n", "ENSP00000244573 | \n", "NM_005325 | \n", "NP_005316 | \n", "protein_coding | \n", "
| 2 | \n", "H1 | \n", "H1.2 | \n", "H1-2 | \n", "3006 | \n", "ENSG00000187837 | \n", "ENST00000343677 | \n", "ENSP00000339566 | \n", "NM_005319 | \n", "NP_005310 | \n", "protein_coding | \n", "
| 3 | \n", "H1 | \n", "H1.3 | \n", "H1-3 | \n", "3007 | \n", "ENSG00000124575 | \n", "ENST00000244534 | \n", "ENSP00000244534 | \n", "NM_005320 | \n", "NP_005311 | \n", "protein_coding | \n", "
| 4 | \n", "H1 | \n", "H1.4 | \n", "H1-4 | \n", "3008 | \n", "ENSG00000168298 | \n", "ENST00000304218 | \n", "ENSP00000307705 | \n", "NM_005321 | \n", "NP_005312 | \n", "protein_coding | \n", "
| 5 | \n", "H1 | \n", "H1.5 | \n", "H1-5 | \n", "3009 | \n", "ENSG00000184357 | \n", "ENST00000331442 | \n", "ENSP00000330074 | \n", "NM_005322 | \n", "NP_005313 | \n", "protein_coding | \n", "
| 6 | \n", "H1 | \n", "TS H1.6 | \n", "H1-6 | \n", "3010 | \n", "ENSG00000187475 | \n", "ENST00000338379 | \n", "ENSP00000341214 | \n", "NM_005323 | \n", "NP_005314 | \n", "protein_coding | \n", "
| 7 | \n", "H1 | \n", "TS H1.7 | \n", "H1-7 | \n", "341567 | \n", "ENSG00000187166 | \n", "ENST00000335017 | \n", "ENSP00000334805 | \n", "NM_181788 | \n", "NP_861453 | \n", "protein_coding | \n", "
| 8 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "ENST00000324382 | \n", "ENSP00000319799 | \n", "NM_153833 | \n", "NP_722575 | \n", "protein_coding | \n", "
| 9 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "ENST00000503977 | \n", "ENSP00000422964 | \n", "NM_001308262 | \n", "NP_001295191 | \n", "protein_coding | \n", "
| 10 | \n", "H1 | \n", "TS H1.9(?) | \n", "H1-9P | \n", "373861 | \n", "ENSG00000188662 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 11 | \n", "H1 | \n", "H1.10 | \n", "H1-10 | \n", "8971 | \n", "ENSG00000184897 | \n", "ENST00000333762 | \n", "ENSP00000329662 | \n", "NM_006026 | \n", "NP_006017 | \n", "protein_coding | \n", "
| 12 | \n", "H2A | \n", "TS H2A.1 | \n", "H2AC1 | \n", "221613 | \n", "ENSG00000164508 | \n", "ENST00000297012 | \n", "ENSP00000297012 | \n", "NM_170745 | \n", "NP_734466 | \n", "protein_coding | \n", "
| 13 | \n", "H2A | \n", "canonical H2A | \n", "H2AC4 | \n", "8335 | \n", "ENSG00000278463 | \n", "ENST00000615868 | \n", "ENSP00000483842 | \n", "NM_003513 | \n", "NP_003504 | \n", "protein_coding | \n", "
| 14 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "ENST00000314088 | \n", "ENSP00000321389 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 15 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "ENST00000602637 | \n", "ENSP00000473534 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "
| 16 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "ENST00000377791 | \n", "ENSP00000367022 | \n", "NM_003512 | \n", "NP_003503 | \n", "protein_coding | \n", "
| 17 | \n", "H2A | \n", "canonical H2A | \n", "H2AC7 | \n", "3013 | \n", "ENSG00000196866 | \n", "ENST00000341023 | \n", "ENSP00000341094 | \n", "NM_021065 | \n", "NP_066409 | \n", "protein_coding | \n", "
| 18 | \n", "H2A | \n", "canonical H2A | \n", "H2AC8 | \n", "3012 | \n", "ENSG00000277075 | \n", "ENST00000303910 | \n", "ENSP00000303373 | \n", "NM_021052 | \n", "NP_066390 | \n", "protein_coding | \n", "
| 19 | \n", "H2A | \n", "canonical H2A | \n", "H2AC11 | \n", "8969 | \n", "ENSG00000196787 | \n", "ENST00000359193 | \n", "ENSP00000352119 | \n", "NM_021064 | \n", "NP_066408 | \n", "protein_coding | \n", "
| 20 | \n", "H2A | \n", "canonical H2A | \n", "H2AC12 | \n", "85235 | \n", "ENSG00000274997 | \n", "ENST00000377459 | \n", "ENSP00000366679 | \n", "NM_080596 | \n", "NP_542163 | \n", "protein_coding | \n", "
| 21 | \n", "H2A | \n", "canonical H2A | \n", "H2AC13 | \n", "8329 | \n", "ENSG00000196747 | \n", "ENST00000358739 | \n", "ENSP00000351589 | \n", "NM_003509 | \n", "NP_003500 | \n", "protein_coding | \n", "
| 22 | \n", "H2A | \n", "canonical H2A | \n", "H2AC14 | \n", "8331 | \n", "ENSG00000276368 | \n", "ENST00000333151 | \n", "ENSP00000328484 | \n", "NM_021066 | \n", "NP_066544 | \n", "protein_coding | \n", "
| 23 | \n", "H2A | \n", "canonical H2A | \n", "H2AC15 | \n", "8330 | \n", "ENSG00000275221 | \n", "ENST00000618958 | \n", "ENSP00000482431 | \n", "NM_003510 | \n", "NP_003501 | \n", "protein_coding | \n", "
| 24 | \n", "H2A | \n", "canonical H2A | \n", "H2AC16 | \n", "8332 | \n", "ENSG00000276903 | \n", "ENST00000613174 | \n", "ENSP00000482538 | \n", "NM_003511 | \n", "NP_003502 | \n", "protein_coding | \n", "
| 25 | \n", "H2A | \n", "canonical H2A | \n", "H2AC17 | \n", "8336 | \n", "ENSG00000278677 | \n", "ENST00000359611 | \n", "ENSP00000352627 | \n", "NM_003514 | \n", "NP_003505 | \n", "protein_coding | \n", "
| 26 | \n", "H2A | \n", "canonical H2A | \n", "H2AC18 | \n", "8337 | \n", "ENSG00000203812 | \n", "ENST00000369159 | \n", "ENSP00000358155 | \n", "NM_003516 | \n", "NP_003507 | \n", "protein_coding | \n", "
| 27 | \n", "H2A | \n", "canonical H2A | \n", "H2AC19 | \n", "723790 | \n", "ENSG00000272196 | \n", "ENST00000607355 | \n", "ENSP00000475814 | \n", "NM_001040874 | \n", "NP_001035807 | \n", "protein_coding | \n", "
| 28 | \n", "H2A | \n", "canonical H2A | \n", "H2AC20 | \n", "8338 | \n", "ENSG00000184260 | \n", "ENST00000331380 | \n", "ENSP00000332194 | \n", "NM_003517 | \n", "NP_003508 | \n", "protein_coding | \n", "
| 29 | \n", "H2A | \n", "canonical H2A | \n", "H2AC21 | \n", "317772 | \n", "ENSG00000184270 | \n", "ENST00000331128 | \n", "ENSP00000332790 | \n", "NM_175065 | \n", "NP_778235 | \n", "protein_coding | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 132 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "ENST00000587171 | \n", "ENSP00000468484 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "
| 133 | \n", "H3 | \n", "TS H3.4 | \n", "H3-4 | \n", "8290 | \n", "ENSG00000168148 | \n", "ENST00000366696 | \n", "ENSP00000355657 | \n", "NM_003493 | \n", "NP_003484 | \n", "protein_coding | \n", "
| 134 | \n", "H3 | \n", "H3.5 | \n", "H3-5 | \n", "440093 | \n", "ENSG00000188375 | \n", "ENST00000340398 | \n", "ENSP00000339835 | \n", "NM_001013699 | \n", "NP_001013721 | \n", "protein_coding | \n", "
| 135 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000335756 | \n", "ENSP00000336868 | \n", "NM_001809 | \n", "NP_001800 | \n", "protein_coding | \n", "
| 136 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000233505 | \n", "ENSP00000233505 | \n", "NM_001042426 | \n", "NP_001035891 | \n", "protein_coding | \n", "
| 137 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000419525 | \n", "ENSP00000404963 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 138 | \n", "H4 | \n", "canonical H4 | \n", "H4C1 | \n", "8359 | \n", "ENSG00000278637 | \n", "ENST00000617569 | \n", "ENSP00000479106 | \n", "NM_003538 | \n", "NP_003529 | \n", "protein_coding | \n", "
| 139 | \n", "H4 | \n", "canonical H4 | \n", "H4C2 | \n", "8366 | \n", "ENSG00000278705 | \n", "ENST00000377745 | \n", "ENSP00000366974 | \n", "NM_003544 | \n", "NP_003535 | \n", "protein_coding | \n", "
| 140 | \n", "H4 | \n", "canonical H4 | \n", "H4C3 | \n", "8364 | \n", "ENSG00000197061 | \n", "ENST00000377803 | \n", "ENSP00000367034 | \n", "NM_003542 | \n", "NP_003533 | \n", "protein_coding | \n", "
| 141 | \n", "H4 | \n", "canonical H4 | \n", "H4C4 | \n", "8360 | \n", "ENSG00000277157 | \n", "ENST00000614247 | \n", "ENSP00000479461 | \n", "NM_003539 | \n", "NP_003530 | \n", "protein_coding | \n", "
| 142 | \n", "H4 | \n", "canonical H4 | \n", "H4C5 | \n", "8367 | \n", "ENSG00000276966 | \n", "ENST00000615164 | \n", "ENSP00000484789 | \n", "NM_003545 | \n", "NP_003536 | \n", "protein_coding | \n", "
| 143 | \n", "H4 | \n", "canonical H4 | \n", "H4C6 | \n", "8361 | \n", "ENSG00000274618 | \n", "ENST00000244537 | \n", "ENSP00000244537 | \n", "NM_003540 | \n", "NP_003531 | \n", "protein_coding | \n", "
| 144 | \n", "H4 | \n", "canonical H4 | \n", "H4C7 | \n", "8369 | \n", "ENSG00000275663 | \n", "ENST00000611444 | \n", "ENSP00000477870 | \n", "NM_003547 | \n", "NP_003538 | \n", "protein_coding | \n", "
| 145 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000634956 | \n", "ENSP00000489567 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 146 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000634560 | \n", "ENSP00000489319 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 147 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000377727 | \n", "ENSP00000366956 | \n", "NM_003543 | \n", "NP_003534 | \n", "protein_coding | \n", "
| 148 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000635491 | \n", "ENSP00000489236 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "
| 149 | \n", "H4 | \n", "canonical H4 | \n", "H4C9 | \n", "8294 | \n", "ENSG00000276180 | \n", "ENST00000615353 | \n", "ENSP00000481486 | \n", "NM_003495 | \n", "NP_003486 | \n", "protein_coding | \n", "
| 150 | \n", "H4 | \n", "canonical H4 | \n", "H4C11 | \n", "8363 | \n", "ENSG00000197238 | \n", "ENST00000355057 | \n", "ENSP00000347168 | \n", "NM_021968 | \n", "NP_068803 | \n", "protein_coding | \n", "
| 151 | \n", "H4 | \n", "canonical H4 | \n", "H4C12 | \n", "8362 | \n", "ENSG00000273542 | \n", "ENST00000611927 | \n", "ENSP00000479794 | \n", "NM_003541 | \n", "NP_003532 | \n", "protein_coding | \n", "
| 152 | \n", "H4 | \n", "canonical H4 | \n", "H4C13 | \n", "8368 | \n", "ENSG00000275126 | \n", "ENST00000618305 | \n", "ENSP00000480960 | \n", "NM_003546 | \n", "NP_003537 | \n", "protein_coding | \n", "
| 153 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000578186 | \n", "ENSP00000462667 | \n", "NM_003548 | \n", "NP_003539 | \n", "protein_coding | \n", "
| 154 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000618193 | \n", "ENSP00000478786 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 155 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000614272 | \n", "ENSP00000478519 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 156 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000613412 | \n", "ENSP00000481343 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 157 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "ENST00000621520 | \n", "ENSP00000481507 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 158 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "ENST00000612061 | \n", "ENSP00000482412 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 159 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "ENST00000579512 | \n", "ENSP00000462355 | \n", "NM_001034077 | \n", "NP_001029249 | \n", "protein_coding | \n", "
| 160 | \n", "H4 | \n", "canonical H4 | \n", "H4-16 | \n", "121504 | \n", "ENSG00000197837 | \n", "ENST00000358064 | \n", "ENSP00000350767 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "
| 161 | \n", "H4 | \n", "canonical H4 | \n", "H4-16 | \n", "121504 | \n", "ENSG00000197837 | \n", "ENST00000539745 | \n", "ENSP00000443017 | \n", "NM_175054 | \n", "NP_778224 | \n", "protein_coding | \n", "
162 rows × 10 columns
\n", "| \n", " | Histone type | \n", "Histone variant | \n", "HGNC symbol | \n", "NCBI gene ID | \n", "Ensembl gene ID | \n", "Transcript stable ID | \n", "Protein stable ID | \n", "RefSeq mRNA ID | \n", "RefSeq peptide ID | \n", "Transcript type | \n", "Protein sequence | \n", "Protein length | \n", "References | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "H1 | \n", "H1.0 | \n", "H1-0 | \n", "3005 | \n", "ENSG00000189060 | \n", "ENST00000340857 | \n", "ENSP00000344504 | \n", "NM_005318 | \n", "NP_005309 | \n", "protein_coding | \n", "b'MTENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAG... | \n", "194 | \n", "26689747 | \n", "
| 1 | \n", "H1 | \n", "H1.1 | \n", "H1-1 | \n", "3024 | \n", "ENSG00000124610 | \n", "ENST00000244573 | \n", "ENSP00000244573 | \n", "NM_005325 | \n", "NP_005316 | \n", "protein_coding | \n", "b'MSETVPPAPAASAAPEKPLAGKKAKKPAKAAAASKKKPAGPSVS... | \n", "215 | \n", "26689747 | \n", "
| 2 | \n", "H1 | \n", "H1.2 | \n", "H1-2 | \n", "3006 | \n", "ENSG00000187837 | \n", "ENST00000343677 | \n", "ENSP00000339566 | \n", "NM_005319 | \n", "NP_005310 | \n", "protein_coding | \n", "b'MSETAPAAPAAAPPAEKAPVKKKAAKKAGGTPRKASGPPVSELI... | \n", "213 | \n", "26689747 | \n", "
| 3 | \n", "H1 | \n", "H1.3 | \n", "H1-3 | \n", "3007 | \n", "ENSG00000124575 | \n", "ENST00000244534 | \n", "ENSP00000244534 | \n", "NM_005320 | \n", "NP_005311 | \n", "protein_coding | \n", "b'MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSEL... | \n", "221 | \n", "26689747 | \n", "
| 4 | \n", "H1 | \n", "H1.4 | \n", "H1-4 | \n", "3008 | \n", "ENSG00000168298 | \n", "ENST00000304218 | \n", "ENSP00000307705 | \n", "NM_005321 | \n", "NP_005312 | \n", "protein_coding | \n", "b'MSETAPAAPAAPAPAEKTPVKKKARKSAGAAKRKASGPPVSELI... | \n", "219 | \n", "26689747 | \n", "
| 5 | \n", "H1 | \n", "H1.5 | \n", "H1-5 | \n", "3009 | \n", "ENSG00000184357 | \n", "ENST00000331442 | \n", "ENSP00000330074 | \n", "NM_005322 | \n", "NP_005313 | \n", "protein_coding | \n", "b'MSETAPAETATPAPVEKSPAKKKATKKAAGAGAAKRKATGPPVS... | \n", "226 | \n", "26689747 | \n", "
| 6 | \n", "H1 | \n", "TS H1.6 | \n", "H1-6 | \n", "3010 | \n", "ENSG00000187475 | \n", "ENST00000338379 | \n", "ENSP00000341214 | \n", "NM_005323 | \n", "NP_005314 | \n", "protein_coding | \n", "b'MSETVPAASASAGVAAMEKLPTKKRGRKPAGLISASRKVPNLSV... | \n", "207 | \n", "26689747 | \n", "
| 7 | \n", "H1 | \n", "TS H1.7 | \n", "H1-7 | \n", "341567 | \n", "ENSG00000187166 | \n", "ENST00000335017 | \n", "ENSP00000334805 | \n", "NM_181788 | \n", "NP_861453 | \n", "protein_coding | \n", "b'MEQALTGEAQSRWPRRGGSGAMAEAPGPSGESRGHSATQLPAEK... | \n", "255 | \n", "26689747 | \n", "
| 8 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "ENST00000324382 | \n", "ENSP00000319799 | \n", "NM_153833 | \n", "NP_722575 | \n", "protein_coding | \n", "b'MAPGSVTSDISPSSTSTAGSSRSPESEKPGPSHGGVPPGGPSHS... | \n", "346 | \n", "26689747 | \n", "
| 9 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "ENST00000503977 | \n", "ENSP00000422964 | \n", "NM_001308262 | \n", "NP_001295191 | \n", "protein_coding | \n", "b'MAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKAAKRPAKV... | \n", "207 | \n", "26689747 | \n", "
| 10 | \n", "H1 | \n", "TS H1.9(?) | \n", "H1-9P | \n", "373861 | \n", "ENSG00000188662 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "b'{\"error\":\"ID \\'nan\\' not found\"}' | \n", "30 | \n", "12920187 26689747 17852044 | \n", "
| 11 | \n", "H1 | \n", "H1.10 | \n", "H1-10 | \n", "8971 | \n", "ENSG00000184897 | \n", "ENST00000333762 | \n", "ENSP00000329662 | \n", "NM_006026 | \n", "NP_006017 | \n", "protein_coding | \n", "b'MSVELEEALPVTTAEGMAKKVTKAGGSAALSPSKKRKNSKKKNQ... | \n", "213 | \n", "26689747 | \n", "
| 12 | \n", "H2A | \n", "TS H2A.1 | \n", "H2AC1 | \n", "221613 | \n", "ENSG00000164508 | \n", "ENST00000297012 | \n", "ENSP00000297012 | \n", "NM_170745 | \n", "NP_734466 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERI... | \n", "131 | \n", "2011515 7068607 24506885 | \n", "
| 13 | \n", "H2A | \n", "canonical H2A | \n", "H2AC4 | \n", "8335 | \n", "ENSG00000278463 | \n", "ENST00000615868 | \n", "ENSP00000483842 | \n", "NM_003513 | \n", "NP_003504 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 14 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "ENST00000314088 | \n", "ENSP00000321389 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 15 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "ENST00000602637 | \n", "ENSP00000473534 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 16 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "ENST00000377791 | \n", "ENSP00000367022 | \n", "NM_003512 | \n", "NP_003503 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 17 | \n", "H2A | \n", "canonical H2A | \n", "H2AC7 | \n", "3013 | \n", "ENSG00000196866 | \n", "ENST00000341023 | \n", "ENSP00000341094 | \n", "NM_021065 | \n", "NP_066409 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 18 | \n", "H2A | \n", "canonical H2A | \n", "H2AC8 | \n", "3012 | \n", "ENSG00000277075 | \n", "ENST00000303910 | \n", "ENSP00000303373 | \n", "NM_021052 | \n", "NP_066390 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 19 | \n", "H2A | \n", "canonical H2A | \n", "H2AC11 | \n", "8969 | \n", "ENSG00000196787 | \n", "ENST00000359193 | \n", "ENSP00000352119 | \n", "NM_021064 | \n", "NP_066408 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 20 | \n", "H2A | \n", "canonical H2A | \n", "H2AC12 | \n", "85235 | \n", "ENSG00000274997 | \n", "ENST00000377459 | \n", "ENSP00000366679 | \n", "NM_080596 | \n", "NP_542163 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "128 | \n", "12408966 25731851 | \n", "
| 21 | \n", "H2A | \n", "canonical H2A | \n", "H2AC13 | \n", "8329 | \n", "ENSG00000196747 | \n", "ENST00000358739 | \n", "ENSP00000351589 | \n", "NM_003509 | \n", "NP_003500 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 22 | \n", "H2A | \n", "canonical H2A | \n", "H2AC14 | \n", "8331 | \n", "ENSG00000276368 | \n", "ENST00000333151 | \n", "ENSP00000328484 | \n", "NM_021066 | \n", "NP_066544 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "128 | \n", "12408966 25731851 | \n", "
| 23 | \n", "H2A | \n", "canonical H2A | \n", "H2AC15 | \n", "8330 | \n", "ENSG00000275221 | \n", "ENST00000618958 | \n", "ENSP00000482431 | \n", "NM_003510 | \n", "NP_003501 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 24 | \n", "H2A | \n", "canonical H2A | \n", "H2AC16 | \n", "8332 | \n", "ENSG00000276903 | \n", "ENST00000613174 | \n", "ENSP00000482538 | \n", "NM_003511 | \n", "NP_003502 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 25 | \n", "H2A | \n", "canonical H2A | \n", "H2AC17 | \n", "8336 | \n", "ENSG00000278677 | \n", "ENST00000359611 | \n", "ENSP00000352627 | \n", "NM_003514 | \n", "NP_003505 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 26 | \n", "H2A | \n", "canonical H2A | \n", "H2AC18 | \n", "8337 | \n", "ENSG00000203812 | \n", "ENST00000369159 | \n", "ENSP00000358155 | \n", "NM_003516 | \n", "NP_003507 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 27 | \n", "H2A | \n", "canonical H2A | \n", "H2AC19 | \n", "723790 | \n", "ENSG00000272196 | \n", "ENST00000607355 | \n", "ENSP00000475814 | \n", "NM_001040874 | \n", "NP_001035807 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 28 | \n", "H2A | \n", "canonical H2A | \n", "H2AC20 | \n", "8338 | \n", "ENSG00000184260 | \n", "ENST00000331380 | \n", "ENSP00000332194 | \n", "NM_003517 | \n", "NP_003508 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "129 | \n", "12408966 25731851 | \n", "
| 29 | \n", "H2A | \n", "canonical H2A | \n", "H2AC21 | \n", "317772 | \n", "ENSG00000184270 | \n", "ENST00000331128 | \n", "ENSP00000332790 | \n", "NM_175065 | \n", "NP_778235 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 132 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "ENST00000587171 | \n", "ENSP00000468484 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "151 | \n", "19412883 | \n", "
| 133 | \n", "H3 | \n", "TS H3.4 | \n", "H3-4 | \n", "8290 | \n", "ENSG00000168148 | \n", "ENST00000366696 | \n", "ENSP00000355657 | \n", "NM_003493 | \n", "NP_003484 | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKVARKSAPATGGVKKPHRYRP... | \n", "136 | \n", "8986613 | \n", "
| 134 | \n", "H3 | \n", "H3.5 | \n", "H3-5 | \n", "440093 | \n", "ENSG00000188375 | \n", "ENST00000340398 | \n", "ENSP00000339835 | \n", "NM_001013699 | \n", "NP_001013721 | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSTPSTCGVKPHRYRPG... | \n", "135 | \n", "21274551 | \n", "
| 135 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000335756 | \n", "ENSP00000336868 | \n", "NM_001809 | \n", "NP_001800 | \n", "protein_coding | \n", "b'MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRR... | \n", "140 | \n", "23324462 | \n", "
| 136 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000233505 | \n", "ENSP00000233505 | \n", "NM_001042426 | \n", "NP_001035891 | \n", "protein_coding | \n", "b'MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRR... | \n", "114 | \n", "23324462 | \n", "
| 137 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000419525 | \n", "ENSP00000404963 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRR... | \n", "86 | \n", "23324462 | \n", "
| 138 | \n", "H4 | \n", "canonical H4 | \n", "H4C1 | \n", "8359 | \n", "ENSG00000278637 | \n", "ENST00000617569 | \n", "ENSP00000479106 | \n", "NM_003538 | \n", "NP_003529 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 139 | \n", "H4 | \n", "canonical H4 | \n", "H4C2 | \n", "8366 | \n", "ENSG00000278705 | \n", "ENST00000377745 | \n", "ENSP00000366974 | \n", "NM_003544 | \n", "NP_003535 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 140 | \n", "H4 | \n", "canonical H4 | \n", "H4C3 | \n", "8364 | \n", "ENSG00000197061 | \n", "ENST00000377803 | \n", "ENSP00000367034 | \n", "NM_003542 | \n", "NP_003533 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 141 | \n", "H4 | \n", "canonical H4 | \n", "H4C4 | \n", "8360 | \n", "ENSG00000277157 | \n", "ENST00000614247 | \n", "ENSP00000479461 | \n", "NM_003539 | \n", "NP_003530 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 142 | \n", "H4 | \n", "canonical H4 | \n", "H4C5 | \n", "8367 | \n", "ENSG00000276966 | \n", "ENST00000615164 | \n", "ENSP00000484789 | \n", "NM_003545 | \n", "NP_003536 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 143 | \n", "H4 | \n", "canonical H4 | \n", "H4C6 | \n", "8361 | \n", "ENSG00000274618 | \n", "ENST00000244537 | \n", "ENSP00000244537 | \n", "NM_003540 | \n", "NP_003531 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 144 | \n", "H4 | \n", "canonical H4 | \n", "H4C7 | \n", "8369 | \n", "ENSG00000275663 | \n", "ENST00000611444 | \n", "ENSP00000477870 | \n", "NM_003547 | \n", "NP_003538 | \n", "protein_coding | \n", "b'MSVRGKAGKGLGKGGAKCHRKVLSDNIQGITKCTIRRLARHGGV... | \n", "98 | \n", "12408966 | \n", "
| 145 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000634956 | \n", "ENSP00000489567 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 146 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000634560 | \n", "ENSP00000489319 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 147 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000377727 | \n", "ENSP00000366956 | \n", "NM_003543 | \n", "NP_003534 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 148 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000635491 | \n", "ENSP00000489236 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 149 | \n", "H4 | \n", "canonical H4 | \n", "H4C9 | \n", "8294 | \n", "ENSG00000276180 | \n", "ENST00000615353 | \n", "ENSP00000481486 | \n", "NM_003495 | \n", "NP_003486 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 150 | \n", "H4 | \n", "canonical H4 | \n", "H4C11 | \n", "8363 | \n", "ENSG00000197238 | \n", "ENST00000355057 | \n", "ENSP00000347168 | \n", "NM_021968 | \n", "NP_068803 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 151 | \n", "H4 | \n", "canonical H4 | \n", "H4C12 | \n", "8362 | \n", "ENSG00000273542 | \n", "ENST00000611927 | \n", "ENSP00000479794 | \n", "NM_003541 | \n", "NP_003532 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 152 | \n", "H4 | \n", "canonical H4 | \n", "H4C13 | \n", "8368 | \n", "ENSG00000275126 | \n", "ENST00000618305 | \n", "ENSP00000480960 | \n", "NM_003546 | \n", "NP_003537 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 153 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000578186 | \n", "ENSP00000462667 | \n", "NM_003548 | \n", "NP_003539 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 154 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000618193 | \n", "ENSP00000478786 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 155 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000614272 | \n", "ENSP00000478519 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 156 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000613412 | \n", "ENSP00000481343 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 157 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "ENST00000621520 | \n", "ENSP00000481507 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 158 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "ENST00000612061 | \n", "ENSP00000482412 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 159 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "ENST00000579512 | \n", "ENSP00000462355 | \n", "NM_001034077 | \n", "NP_001029249 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 160 | \n", "H4 | \n", "canonical H4 | \n", "H4-16 | \n", "121504 | \n", "ENSG00000197837 | \n", "ENST00000358064 | \n", "ENSP00000350767 | \n", "NaN | \n", "NaN | \n", "nonsense_mediated_decay | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 161 | \n", "H4 | \n", "canonical H4 | \n", "H4-16 | \n", "121504 | \n", "ENSG00000197837 | \n", "ENST00000539745 | \n", "ENSP00000443017 | \n", "NM_175054 | \n", "NP_778224 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
162 rows × 13 columns
\n", "| \n", " | Histone type | \n", "Histone variant | \n", "HGNC symbol | \n", "NCBI gene ID | \n", "Ensembl gene ID | \n", "Transcript stable ID | \n", "Protein stable ID | \n", "RefSeq mRNA ID | \n", "RefSeq peptide ID | \n", "Transcript type | \n", "Protein sequence | \n", "Protein length | \n", "References | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "H1 | \n", "H1.0 | \n", "H1-0 | \n", "3005 | \n", "ENSG00000189060 | \n", "ENST00000340857 | \n", "ENSP00000344504 | \n", "NM_005318 | \n", "NP_005309 | \n", "protein_coding | \n", "b'MTENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAG... | \n", "194 | \n", "26689747 | \n", "
| 1 | \n", "H1 | \n", "H1.1 | \n", "H1-1 | \n", "3024 | \n", "ENSG00000124610 | \n", "ENST00000244573 | \n", "ENSP00000244573 | \n", "NM_005325 | \n", "NP_005316 | \n", "protein_coding | \n", "b'MSETVPPAPAASAAPEKPLAGKKAKKPAKAAAASKKKPAGPSVS... | \n", "215 | \n", "26689747 | \n", "
| 2 | \n", "H1 | \n", "H1.2 | \n", "H1-2 | \n", "3006 | \n", "ENSG00000187837 | \n", "ENST00000343677 | \n", "ENSP00000339566 | \n", "NM_005319 | \n", "NP_005310 | \n", "protein_coding | \n", "b'MSETAPAAPAAAPPAEKAPVKKKAAKKAGGTPRKASGPPVSELI... | \n", "213 | \n", "26689747 | \n", "
| 3 | \n", "H1 | \n", "H1.3 | \n", "H1-3 | \n", "3007 | \n", "ENSG00000124575 | \n", "ENST00000244534 | \n", "ENSP00000244534 | \n", "NM_005320 | \n", "NP_005311 | \n", "protein_coding | \n", "b'MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSEL... | \n", "221 | \n", "26689747 | \n", "
| 4 | \n", "H1 | \n", "H1.4 | \n", "H1-4 | \n", "3008 | \n", "ENSG00000168298 | \n", "ENST00000304218 | \n", "ENSP00000307705 | \n", "NM_005321 | \n", "NP_005312 | \n", "protein_coding | \n", "b'MSETAPAAPAAPAPAEKTPVKKKARKSAGAAKRKASGPPVSELI... | \n", "219 | \n", "26689747 | \n", "
| 5 | \n", "H1 | \n", "H1.5 | \n", "H1-5 | \n", "3009 | \n", "ENSG00000184357 | \n", "ENST00000331442 | \n", "ENSP00000330074 | \n", "NM_005322 | \n", "NP_005313 | \n", "protein_coding | \n", "b'MSETAPAETATPAPVEKSPAKKKATKKAAGAGAAKRKATGPPVS... | \n", "226 | \n", "26689747 | \n", "
| 6 | \n", "H1 | \n", "TS H1.6 | \n", "H1-6 | \n", "3010 | \n", "ENSG00000187475 | \n", "ENST00000338379 | \n", "ENSP00000341214 | \n", "NM_005323 | \n", "NP_005314 | \n", "protein_coding | \n", "b'MSETVPAASASAGVAAMEKLPTKKRGRKPAGLISASRKVPNLSV... | \n", "207 | \n", "26689747 | \n", "
| 7 | \n", "H1 | \n", "TS H1.7 | \n", "H1-7 | \n", "341567 | \n", "ENSG00000187166 | \n", "ENST00000335017 | \n", "ENSP00000334805 | \n", "NM_181788 | \n", "NP_861453 | \n", "protein_coding | \n", "b'MEQALTGEAQSRWPRRGGSGAMAEAPGPSGESRGHSATQLPAEK... | \n", "255 | \n", "26689747 | \n", "
| 8 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "ENST00000324382 | \n", "ENSP00000319799 | \n", "NM_153833 | \n", "NP_722575 | \n", "protein_coding | \n", "b'MAPGSVTSDISPSSTSTAGSSRSPESEKPGPSHGGVPPGGPSHS... | \n", "346 | \n", "26689747 | \n", "
| 9 | \n", "H1 | \n", "OO H1.8 | \n", "H1-8 | \n", "132243 | \n", "ENSG00000178804 | \n", "ENST00000503977 | \n", "ENSP00000422964 | \n", "NM_001308262 | \n", "NP_001295191 | \n", "protein_coding | \n", "b'MAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKAAKRPAKV... | \n", "207 | \n", "26689747 | \n", "
| 10 | \n", "H1 | \n", "TS H1.9(?) | \n", "H1-9P | \n", "373861 | \n", "ENSG00000188662 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "b'{\"error\":\"ID \\'nan\\' not found\"}' | \n", "30 | \n", "12920187 26689747 17852044 | \n", "
| 11 | \n", "H1 | \n", "H1.10 | \n", "H1-10 | \n", "8971 | \n", "ENSG00000184897 | \n", "ENST00000333762 | \n", "ENSP00000329662 | \n", "NM_006026 | \n", "NP_006017 | \n", "protein_coding | \n", "b'MSVELEEALPVTTAEGMAKKVTKAGGSAALSPSKKRKNSKKKNQ... | \n", "213 | \n", "26689747 | \n", "
| 12 | \n", "H2A | \n", "TS H2A.1 | \n", "H2AC1 | \n", "221613 | \n", "ENSG00000164508 | \n", "ENST00000297012 | \n", "ENSP00000297012 | \n", "NM_170745 | \n", "NP_734466 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERI... | \n", "131 | \n", "2011515 7068607 24506885 | \n", "
| 13 | \n", "H2A | \n", "canonical H2A | \n", "H2AC4 | \n", "8335 | \n", "ENSG00000278463 | \n", "ENST00000615868 | \n", "ENSP00000483842 | \n", "NM_003513 | \n", "NP_003504 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 16 | \n", "H2A | \n", "canonical H2A | \n", "H2AC6 | \n", "8334 | \n", "ENSG00000180573 | \n", "ENST00000377791 | \n", "ENSP00000367022 | \n", "NM_003512 | \n", "NP_003503 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 17 | \n", "H2A | \n", "canonical H2A | \n", "H2AC7 | \n", "3013 | \n", "ENSG00000196866 | \n", "ENST00000341023 | \n", "ENSP00000341094 | \n", "NM_021065 | \n", "NP_066409 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 18 | \n", "H2A | \n", "canonical H2A | \n", "H2AC8 | \n", "3012 | \n", "ENSG00000277075 | \n", "ENST00000303910 | \n", "ENSP00000303373 | \n", "NM_021052 | \n", "NP_066390 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 19 | \n", "H2A | \n", "canonical H2A | \n", "H2AC11 | \n", "8969 | \n", "ENSG00000196787 | \n", "ENST00000359193 | \n", "ENSP00000352119 | \n", "NM_021064 | \n", "NP_066408 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 20 | \n", "H2A | \n", "canonical H2A | \n", "H2AC12 | \n", "85235 | \n", "ENSG00000274997 | \n", "ENST00000377459 | \n", "ENSP00000366679 | \n", "NM_080596 | \n", "NP_542163 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "128 | \n", "12408966 25731851 | \n", "
| 21 | \n", "H2A | \n", "canonical H2A | \n", "H2AC13 | \n", "8329 | \n", "ENSG00000196747 | \n", "ENST00000358739 | \n", "ENSP00000351589 | \n", "NM_003509 | \n", "NP_003500 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 22 | \n", "H2A | \n", "canonical H2A | \n", "H2AC14 | \n", "8331 | \n", "ENSG00000276368 | \n", "ENST00000333151 | \n", "ENSP00000328484 | \n", "NM_021066 | \n", "NP_066544 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "128 | \n", "12408966 25731851 | \n", "
| 23 | \n", "H2A | \n", "canonical H2A | \n", "H2AC15 | \n", "8330 | \n", "ENSG00000275221 | \n", "ENST00000618958 | \n", "ENSP00000482431 | \n", "NM_003510 | \n", "NP_003501 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 24 | \n", "H2A | \n", "canonical H2A | \n", "H2AC16 | \n", "8332 | \n", "ENSG00000276903 | \n", "ENST00000613174 | \n", "ENSP00000482538 | \n", "NM_003511 | \n", "NP_003502 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 25 | \n", "H2A | \n", "canonical H2A | \n", "H2AC17 | \n", "8336 | \n", "ENSG00000278677 | \n", "ENST00000359611 | \n", "ENSP00000352627 | \n", "NM_003514 | \n", "NP_003505 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 26 | \n", "H2A | \n", "canonical H2A | \n", "H2AC18 | \n", "8337 | \n", "ENSG00000203812 | \n", "ENST00000369159 | \n", "ENSP00000358155 | \n", "NM_003516 | \n", "NP_003507 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 27 | \n", "H2A | \n", "canonical H2A | \n", "H2AC19 | \n", "723790 | \n", "ENSG00000272196 | \n", "ENST00000607355 | \n", "ENSP00000475814 | \n", "NM_001040874 | \n", "NP_001035807 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 28 | \n", "H2A | \n", "canonical H2A | \n", "H2AC20 | \n", "8338 | \n", "ENSG00000184260 | \n", "ENST00000331380 | \n", "ENSP00000332194 | \n", "NM_003517 | \n", "NP_003508 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "129 | \n", "12408966 25731851 | \n", "
| 29 | \n", "H2A | \n", "canonical H2A | \n", "H2AC21 | \n", "317772 | \n", "ENSG00000184270 | \n", "ENST00000331128 | \n", "ENSP00000332790 | \n", "NM_175065 | \n", "NP_778235 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "130 | \n", "12408966 25731851 | \n", "
| 30 | \n", "H2A | \n", "H2A.J(?) | \n", "H2AJ | \n", "55766 | \n", "ENSG00000246705 | \n", "ENST00000544848 | \n", "ENSP00000438553 | \n", "NM_177925 | \n", "NP_808760 | \n", "protein_coding | \n", "b'MSGRGKQGGKVRAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERV... | \n", "129 | \n", "25731851 | \n", "
| 33 | \n", "H2A | \n", "canonical H2A | \n", "H2AW | \n", "92815 | \n", "ENSG00000181218 | \n", "ENST00000366695 | \n", "ENSP00000355656 | \n", "NM_033445 | \n", "NP_254280 | \n", "protein_coding | \n", "b'MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERV... | \n", "130 | \n", "? | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 112 | \n", "H3 | \n", "H3.Y.1 | \n", "H3Y1 | \n", "391769 | \n", "ENSG00000269466 | \n", "ENST00000598383 | \n", "ENSP00000496014 | \n", "NM_001355258 | \n", "NP_001342187 | \n", "protein_coding | \n", "b'MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKP... | \n", "136 | \n", "20819935 | \n", "
| 113 | \n", "H3 | \n", "H3.Y.2 | \n", "H3Y2 | \n", "340096 | \n", "ENSG00000268799 | \n", "ENST00000600799 | \n", "ENSP00000497053 | \n", "NM_001371919 | \n", "NP_001358848 | \n", "protein_coding | \n", "b'MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKP... | \n", "147 | \n", "20819935 | \n", "
| 115 | \n", "H3 | \n", "canonical H3(?) | \n", "H3-2 | \n", "440686 | \n", "ENSG00000273213 | \n", "ENST00000609879 | \n", "ENSP00000499501 | \n", "NM_001355409 | \n", "NP_001342338 | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRP... | \n", "136 | \n", "12408966 | \n", "
| 118 | \n", "H3 | \n", "H3.3 | \n", "H3-3A | \n", "3020 | \n", "ENSG00000163041 | \n", "ENST00000366814 | \n", "ENSP00000355779 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "123 | \n", "19412883 | \n", "
| 119 | \n", "H3 | \n", "H3.3 | \n", "H3-3A | \n", "3020 | \n", "ENSG00000163041 | \n", "ENST00000366815 | \n", "ENSP00000355780 | \n", "NM_002107 | \n", "NP_002098 | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "136 | \n", "19412883 | \n", "
| 121 | \n", "H3 | \n", "H3.3 | \n", "H3-3A | \n", "3020 | \n", "ENSG00000163041 | \n", "ENST00000667897 | \n", "ENSP00000499446 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "120 | \n", "19412883 | \n", "
| 125 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "ENST00000254810 | \n", "ENSP00000254810 | \n", "NM_005324 | \n", "NP_005315 | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "136 | \n", "19412883 | \n", "
| 126 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "ENST00000592643 | \n", "ENSP00000467165 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "113 | \n", "19412883 | \n", "
| 127 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "ENST00000591890 | \n", "ENSP00000466663 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "92 | \n", "19412883 | \n", "
| 131 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "ENST00000586270 | \n", "ENSP00000465403 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "132 | \n", "19412883 | \n", "
| 132 | \n", "H3 | \n", "H3.3 | \n", "H3-3B | \n", "3021 | \n", "ENSG00000132475 | \n", "ENST00000587171 | \n", "ENSP00000468484 | \n", "NaN | \n", "NaN | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRP... | \n", "151 | \n", "19412883 | \n", "
| 133 | \n", "H3 | \n", "TS H3.4 | \n", "H3-4 | \n", "8290 | \n", "ENSG00000168148 | \n", "ENST00000366696 | \n", "ENSP00000355657 | \n", "NM_003493 | \n", "NP_003484 | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKVARKSAPATGGVKKPHRYRP... | \n", "136 | \n", "8986613 | \n", "
| 134 | \n", "H3 | \n", "H3.5 | \n", "H3-5 | \n", "440093 | \n", "ENSG00000188375 | \n", "ENST00000340398 | \n", "ENSP00000339835 | \n", "NM_001013699 | \n", "NP_001013721 | \n", "protein_coding | \n", "b'MARTKQTARKSTGGKAPRKQLATKAARKSTPSTCGVKPHRYRPG... | \n", "135 | \n", "21274551 | \n", "
| 135 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000335756 | \n", "ENSP00000336868 | \n", "NM_001809 | \n", "NP_001800 | \n", "protein_coding | \n", "b'MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRR... | \n", "140 | \n", "23324462 | \n", "
| 136 | \n", "H3 | \n", "cenH3 | \n", "CENPA | \n", "1058 | \n", "ENSG00000115163 | \n", "ENST00000233505 | \n", "ENSP00000233505 | \n", "NM_001042426 | \n", "NP_001035891 | \n", "protein_coding | \n", "b'MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRR... | \n", "114 | \n", "23324462 | \n", "
| 138 | \n", "H4 | \n", "canonical H4 | \n", "H4C1 | \n", "8359 | \n", "ENSG00000278637 | \n", "ENST00000617569 | \n", "ENSP00000479106 | \n", "NM_003538 | \n", "NP_003529 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 139 | \n", "H4 | \n", "canonical H4 | \n", "H4C2 | \n", "8366 | \n", "ENSG00000278705 | \n", "ENST00000377745 | \n", "ENSP00000366974 | \n", "NM_003544 | \n", "NP_003535 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 140 | \n", "H4 | \n", "canonical H4 | \n", "H4C3 | \n", "8364 | \n", "ENSG00000197061 | \n", "ENST00000377803 | \n", "ENSP00000367034 | \n", "NM_003542 | \n", "NP_003533 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 141 | \n", "H4 | \n", "canonical H4 | \n", "H4C4 | \n", "8360 | \n", "ENSG00000277157 | \n", "ENST00000614247 | \n", "ENSP00000479461 | \n", "NM_003539 | \n", "NP_003530 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 142 | \n", "H4 | \n", "canonical H4 | \n", "H4C5 | \n", "8367 | \n", "ENSG00000276966 | \n", "ENST00000615164 | \n", "ENSP00000484789 | \n", "NM_003545 | \n", "NP_003536 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 143 | \n", "H4 | \n", "canonical H4 | \n", "H4C6 | \n", "8361 | \n", "ENSG00000274618 | \n", "ENST00000244537 | \n", "ENSP00000244537 | \n", "NM_003540 | \n", "NP_003531 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 144 | \n", "H4 | \n", "canonical H4 | \n", "H4C7 | \n", "8369 | \n", "ENSG00000275663 | \n", "ENST00000611444 | \n", "ENSP00000477870 | \n", "NM_003547 | \n", "NP_003538 | \n", "protein_coding | \n", "b'MSVRGKAGKGLGKGGAKCHRKVLSDNIQGITKCTIRRLARHGGV... | \n", "98 | \n", "12408966 | \n", "
| 147 | \n", "H4 | \n", "canonical H4 | \n", "H4C8 | \n", "8365 | \n", "ENSG00000158406 | \n", "ENST00000377727 | \n", "ENSP00000366956 | \n", "NM_003543 | \n", "NP_003534 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 149 | \n", "H4 | \n", "canonical H4 | \n", "H4C9 | \n", "8294 | \n", "ENSG00000276180 | \n", "ENST00000615353 | \n", "ENSP00000481486 | \n", "NM_003495 | \n", "NP_003486 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 150 | \n", "H4 | \n", "canonical H4 | \n", "H4C11 | \n", "8363 | \n", "ENSG00000197238 | \n", "ENST00000355057 | \n", "ENSP00000347168 | \n", "NM_021968 | \n", "NP_068803 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 151 | \n", "H4 | \n", "canonical H4 | \n", "H4C12 | \n", "8362 | \n", "ENSG00000273542 | \n", "ENST00000611927 | \n", "ENSP00000479794 | \n", "NM_003541 | \n", "NP_003532 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 152 | \n", "H4 | \n", "canonical H4 | \n", "H4C13 | \n", "8368 | \n", "ENSG00000275126 | \n", "ENST00000618305 | \n", "ENSP00000480960 | \n", "NM_003546 | \n", "NP_003537 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 153 | \n", "H4 | \n", "canonical H4 | \n", "H4C14 | \n", "8370 | \n", "ENSG00000270882 | \n", "ENST00000578186 | \n", "ENSP00000462667 | \n", "NM_003548 | \n", "NP_003539 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 159 | \n", "H4 | \n", "canonical H4 | \n", "H4C15 | \n", "554313 | \n", "ENSG00000270276 | \n", "ENST00000579512 | \n", "ENSP00000462355 | \n", "NM_001034077 | \n", "NP_001029249 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
| 161 | \n", "H4 | \n", "canonical H4 | \n", "H4-16 | \n", "121504 | \n", "ENSG00000197837 | \n", "ENST00000539745 | \n", "ENSP00000443017 | \n", "NM_175054 | \n", "NP_778224 | \n", "protein_coding | \n", "b'MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGV... | \n", "103 | \n", "12408966 | \n", "
120 rows × 13 columns
\n", "