{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#### Data source:\n", "Lee M, Teber ET, Holmes O, Nones K, Patch AM, Dagg RA, Lau LMS, Lee JH, Napier CE, Arthur JW, Grimmond SM, Hayward NK, Johansson PA, Mann GJ, Scolyer RA, Wilmott JS, Reddel RR, Pearson JV, Waddell N, Pickett HA. \n", "**Telomere sequence content can be used to determine ALT activity in tumours.**\n", "_Nucleic Acids Res._ 2018 Jun 1;46(10):4903-4918." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# STEP 1: Preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | TTAGGG | \n", "ATAGGG | \n", "CTAGGG | \n", "GTAGGG | \n", "TAAGGG | \n", "TCAGGG | \n", "TGAGGG | \n", "TTCGGG | \n", "TTGGGG | \n", "TTTGGG | \n", "... | \n", "TTACGG | \n", "TTATGG | \n", "TTAGAG | \n", "TTAGCG | \n", "TTAGTG | \n", "TTAGGA | \n", "TTAGGC | \n", "TTAGGT | \n", "rel_TL | \n", "TMM | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "94.846 | \n", "0.019 | \n", "0.430 | \n", "0.422 | \n", "0.216 | \n", "0.544 | \n", "1.762 | \n", "0.535 | \n", "0.338 | \n", "0.068 | \n", "... | \n", "0.028 | \n", "0.118 | \n", "0.153 | \n", "0.000 | \n", "0.049 | \n", "0.060 | \n", "0.033 | \n", "0.089 | \n", "-0.89 | \n", "- | \n", "
| 1 | \n", "94.951 | \n", "0.011 | \n", "0.241 | \n", "0.491 | \n", "0.223 | \n", "0.317 | \n", "1.351 | \n", "0.818 | \n", "0.702 | \n", "0.090 | \n", "... | \n", "0.024 | \n", "0.125 | \n", "0.080 | \n", "0.024 | \n", "0.035 | \n", "0.155 | \n", "0.030 | \n", "0.093 | \n", "-0.39 | \n", "- | \n", "
| 2 | \n", "94.889 | \n", "0.043 | \n", "0.439 | \n", "0.478 | \n", "0.355 | \n", "0.316 | \n", "1.151 | \n", "0.625 | \n", "0.313 | \n", "0.079 | \n", "... | \n", "0.041 | \n", "0.253 | \n", "0.195 | \n", "0.032 | \n", "0.043 | \n", "0.161 | \n", "0.047 | \n", "0.185 | \n", "-1.66 | \n", "- | \n", "
| 3 | \n", "94.202 | \n", "0.017 | \n", "0.252 | \n", "0.509 | \n", "0.396 | \n", "0.548 | \n", "1.877 | \n", "0.856 | \n", "0.440 | \n", "0.097 | \n", "... | \n", "0.053 | \n", "0.110 | \n", "0.125 | \n", "0.000 | \n", "0.043 | \n", "0.069 | \n", "0.029 | \n", "0.110 | \n", "-1.73 | \n", "- | \n", "
| 4 | \n", "96.368 | \n", "0.011 | \n", "0.078 | \n", "0.131 | \n", "0.015 | \n", "0.306 | \n", "1.525 | \n", "1.165 | \n", "0.126 | \n", "0.000 | \n", "... | \n", "0.014 | \n", "0.099 | \n", "0.022 | \n", "0.000 | \n", "0.019 | \n", "0.026 | \n", "0.009 | \n", "0.014 | \n", "0.21 | \n", "- | \n", "
| 5 | \n", "98.843 | \n", "0.001 | \n", "0.112 | \n", "0.179 | \n", "0.000 | \n", "0.073 | \n", "0.285 | \n", "0.280 | \n", "0.094 | \n", "0.000 | \n", "... | \n", "0.009 | \n", "0.011 | \n", "0.045 | \n", "0.000 | \n", "0.002 | \n", "0.014 | \n", "0.010 | \n", "0.003 | \n", "0.56 | \n", "- | \n", "
| 6 | \n", "97.041 | \n", "0.002 | \n", "0.209 | \n", "0.324 | \n", "0.200 | \n", "0.391 | \n", "0.640 | \n", "0.353 | \n", "0.257 | \n", "0.041 | \n", "... | \n", "0.014 | \n", "0.066 | \n", "0.089 | \n", "0.000 | \n", "0.017 | \n", "0.086 | \n", "0.016 | \n", "0.041 | \n", "0.01 | \n", "- | \n", "
| 7 | \n", "93.687 | \n", "0.034 | \n", "0.444 | \n", "0.651 | \n", "0.463 | \n", "0.655 | \n", "1.347 | \n", "0.836 | \n", "0.550 | \n", "0.133 | \n", "... | \n", "0.038 | \n", "0.136 | \n", "0.191 | \n", "0.046 | \n", "0.034 | \n", "0.160 | \n", "0.033 | \n", "0.099 | \n", "-0.79 | \n", "- | \n", "
| 8 | \n", "97.500 | \n", "0.015 | \n", "0.149 | \n", "0.264 | \n", "0.078 | \n", "0.238 | \n", "0.917 | \n", "0.184 | \n", "0.206 | \n", "0.038 | \n", "... | \n", "0.019 | \n", "0.086 | \n", "0.060 | \n", "0.000 | \n", "0.029 | \n", "0.045 | \n", "0.024 | \n", "0.031 | \n", "-0.83 | \n", "- | \n", "
| 9 | \n", "97.110 | \n", "0.001 | \n", "0.223 | \n", "0.303 | \n", "0.166 | \n", "0.293 | \n", "0.729 | \n", "0.382 | \n", "0.253 | \n", "0.026 | \n", "... | \n", "0.026 | \n", "0.036 | \n", "0.148 | \n", "0.000 | \n", "0.017 | \n", "0.059 | \n", "0.019 | \n", "0.029 | \n", "-0.02 | \n", "- | \n", "
10 rows × 21 columns
\n", "| \n", " | TTAGGG | \n", "ATAGGG | \n", "CTAGGG | \n", "GTAGGG | \n", "TAAGGG | \n", "TCAGGG | \n", "TGAGGG | \n", "TTCGGG | \n", "TTGGGG | \n", "TTTGGG | \n", "TTAAGG | \n", "TTACGG | \n", "TTATGG | \n", "TTAGAG | \n", "TTAGCG | \n", "TTAGTG | \n", "TTAGGA | \n", "TTAGGC | \n", "TTAGGT | \n", "rel_TL | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "98.621 | \n", "0.002 | \n", "0.029 | \n", "0.184 | \n", "0.000 | \n", "0.817 | \n", "0.130 | \n", "0.079 | \n", "0.065 | \n", "0.000 | \n", "0.013 | \n", "0.007 | \n", "0.010 | \n", "0.008 | \n", "0.000 | \n", "0.007 | \n", "0.016 | \n", "0.009 | \n", "0.003 | \n", "2.00 | \n", "
| 1 | \n", "94.300 | \n", "0.033 | \n", "0.448 | \n", "0.651 | \n", "0.338 | \n", "0.462 | \n", "1.052 | \n", "0.678 | \n", "0.628 | \n", "0.110 | \n", "0.263 | \n", "0.061 | \n", "0.120 | \n", "0.191 | \n", "0.053 | \n", "0.056 | \n", "0.328 | \n", "0.046 | \n", "0.182 | \n", "-1.01 | \n", "
| 2 | \n", "98.666 | \n", "0.002 | \n", "0.073 | \n", "0.163 | \n", "0.000 | \n", "0.076 | \n", "0.334 | \n", "0.415 | \n", "0.075 | \n", "0.000 | \n", "0.023 | \n", "0.010 | \n", "0.014 | \n", "0.053 | \n", "0.000 | \n", "0.009 | \n", "0.065 | \n", "0.013 | \n", "0.007 | \n", "0.81 | \n", "
| 3 | \n", "97.384 | \n", "0.008 | \n", "0.275 | \n", "0.425 | \n", "0.186 | \n", "0.277 | \n", "0.548 | \n", "0.156 | \n", "0.185 | \n", "0.012 | \n", "0.113 | \n", "0.034 | \n", "0.110 | \n", "0.095 | \n", "0.000 | \n", "0.036 | \n", "0.081 | \n", "0.036 | \n", "0.037 | \n", "0.00 | \n", "
| 4 | \n", "96.525 | \n", "0.026 | \n", "0.204 | \n", "0.230 | \n", "0.209 | \n", "0.417 | \n", "0.934 | \n", "0.197 | \n", "0.397 | \n", "0.168 | \n", "0.193 | \n", "0.026 | \n", "0.070 | \n", "0.093 | \n", "0.003 | \n", "0.034 | \n", "0.158 | \n", "0.050 | \n", "0.066 | \n", "-1.04 | \n", "
| 5 | \n", "96.150 | \n", "0.067 | \n", "0.273 | \n", "0.307 | \n", "0.252 | \n", "0.355 | \n", "1.061 | \n", "0.275 | \n", "0.444 | \n", "0.078 | \n", "0.157 | \n", "0.020 | \n", "0.092 | \n", "0.108 | \n", "0.021 | \n", "0.022 | \n", "0.219 | \n", "0.033 | \n", "0.067 | \n", "-0.13 | \n", "