{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Notebook for getting count matrix for individual crab RNAseq from crab transcriptome v 3.1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Note: The individual crab RNAseq was not used in the assembly of crab transcriptome v 3.1 \n", "I will just be using samples for an individual crab over time: ambient and infected. Library IDs: 178, 359, and 463" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Notebook based on this from Steven Roberts: https://github.com/sr320/nb-2020/blob/master/C_bairdi/20-kallisto.ipynb" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "kallisto 0.46.1\r\n", "\r\n", "Usage: kallisto [arguments] ..\r\n", "\r\n", "Where can be one of:\r\n", "\r\n", " index Builds a kallisto index \r\n", " quant Runs the quantification algorithm \r\n", " bus Generate BUS files for single-cell data \r\n", " pseudo Runs the pseudoalignment step \r\n", " merge Merges several batch runs \r\n", " h5dump Converts HDF5-formatted results to plaintext\r\n", " inspect Inspects and gives information about an index\r\n", " version Prints version information\r\n", " cite Prints citation information\r\n", "\r\n", "Running kallisto without arguments prints usage information for \r\n", "\r\n" ] } ], "source": [ "!/Applications/bioinfo/kallisto/kallisto" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cbai_transcriptome_31\r\n", "cbai_transcriptome_v3.1.fasta\r\n", "\u001b[34mkallisto\u001b[m\u001b[m/\r\n", "kallisto-4libraries.ipynb\r\n", "kallisto-individual-crab.ipynb\r\n", "readme.md\r\n", "transcriptomev3.1-BLAST-to-GOslim.ipynb\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2020-08-15 12:57:37-- https://owl.fish.washington.edu/halfshell/genomic-databank/cbai_transcriptome_v3.1.fasta\n", "Resolving owl.fish.washington.edu (owl.fish.washington.edu)... 128.95.149.83\n", "Connecting to owl.fish.washington.edu (owl.fish.washington.edu)|128.95.149.83|:443... connected.\n", "WARNING: cannot verify owl.fish.washington.edu's certificate, issued by ‘CN=InCommon RSA Server CA,OU=InCommon,O=Internet2,L=Ann Arbor,ST=MI,C=US’:\n", " Unable to locally verify the issuer's authority.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 145648199 (139M)\n", "Saving to: ‘cbai_transcriptome_v3.1.fasta.1’\n", "\n", "cbai_transcriptome_ 100%[===================>] 138.90M 19.1MB/s in 11s \n", "\n", "2020-08-15 12:57:48 (13.0 MB/s) - ‘cbai_transcriptome_v3.1.fasta.1’ saved [145648199/145648199]\n", "\n" ] } ], "source": [ "!wget --no-check-certificate https://owl.fish.washington.edu/halfshell/genomic-databank/cbai_transcriptome_v3.1.fasta" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "[build] loading fasta file cbai_transcriptome_v3.1.fasta\n", "[build] k-mer length: 31\n", "[build] warning: clipped off poly-A tail (longer than 10)\n", " from 12560 target sequences\n", "[build] counting k-mers ... done.\n", "[build] building target de Bruijn graph ... done \n", "[build] creating equivalence classes ... done\n", "[build] target de Bruijn graph has 303833 contigs and contains 43881871 k-mers \n", "\n" ] } ], "source": [ "!/Applications/bioinfo/kallisto/kallisto \\\n", "index -i cbai_transcriptome_31 \\\n", "cbai_transcriptome_v3.1.fasta" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "[quant] fragment length distribution will be estimated from the data\n", "[index] k-mer length: 31\n", "[index] number of targets: 78,649\n", "[index] number of k-mers: 43,881,871\n", "[index] number of equivalence classes: 213,585\n", "[quant] running in paired-end mode\n", "[quant] will process pair 1: /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/178_R1_001.fastp-trim.202003181815.fq.gz\n", " /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/178_R2_001.fastp-trim.202003181815.fq.gz\n", "[quant] finding pseudoalignments for the reads ... done\n", "[quant] processed 29,009,909 reads, 19,091,104 reads pseudoaligned\n", "[quant] estimated average fragment length: 122.091\n", "[ em] quantifying the abundances ... done\n", "[ em] the Expectation-Maximization algorithm ran for 1,141 rounds\n", "\n" ] } ], "source": [ "#sequence files for library 178 (single crab: ambient infected - day 0)\n", "!/Applications/bioinfo/kallisto/kallisto quant \\\n", "-i /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/cbai_transcriptome_31 \\\n", "-t 4 \\\n", "-o kallisto/178_ambient_infected_0/ \\\n", "/Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/178_R1_001.fastp-trim.202003181815.fq.gz /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/178_R2_001.fastp-trim.202003181815.fq.gz" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "[quant] fragment length distribution will be estimated from the data\n", "[index] k-mer length: 31\n", "[index] number of targets: 78,649\n", "[index] number of k-mers: 43,881,871\n", "[index] number of equivalence classes: 213,585\n", "[quant] running in paired-end mode\n", "[quant] will process pair 1: /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/359_R1_001.fastp-trim.202003182247.fq.gz\n", " /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/359_R2_001.fastp-trim.202003182247.fq.gz\n", "[quant] finding pseudoalignments for the reads ... done\n", "[quant] processed 27,021,012 reads, 16,787,891 reads pseudoaligned\n", "[quant] estimated average fragment length: 124.407\n", "[ em] quantifying the abundances ... done\n", "[ em] the Expectation-Maximization algorithm ran for 1,223 rounds\n", "\n" ] } ], "source": [ "#sequence files for library 359 (single crab: ambient infected - day 2)\n", "!/Applications/bioinfo/kallisto/kallisto quant \\\n", "-i /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/cbai_transcriptome_31 \\\n", "-t 4 \\\n", "-o kallisto/359_ambient_infected_2/ \\\n", "/Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/359_R1_001.fastp-trim.202003182247.fq.gz /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/359_R2_001.fastp-trim.202003182247.fq.gz" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "[quant] fragment length distribution will be estimated from the data\n", "[index] k-mer length: 31\n", "[index] number of targets: 78,649\n", "[index] number of k-mers: 43,881,871\n", "[index] number of equivalence classes: 213,585\n", "[quant] running in paired-end mode\n", "[quant] will process pair 1: /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/463_R1_001.fastp-trim.202003185732.fq.gz\n", " /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/463_R2_001.fastp-trim.202003185732.fq.gz\n", "[quant] finding pseudoalignments for the reads ... done\n", "[quant] processed 19,061,045 reads, 8,884,132 reads pseudoaligned\n", "[quant] estimated average fragment length: 77.2951\n", "[ em] quantifying the abundances ... done\n", "[ em] the Expectation-Maximization algorithm ran for 1,402 rounds\n", "\n" ] } ], "source": [ "#sequence files for library 463 (single crab: ambient infected - day 17)\n", "!/Applications/bioinfo/kallisto/kallisto quant \\\n", "-i /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/cbai_transcriptome_31 \\\n", "-t 4 \\\n", "-o kallisto/463_ambient_infected_17/ \\\n", "/Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/463_R1_001.fastp-trim.202003185732.fq.gz /Volumes/web/Atumefaciens/20200318_cbai_RNAseq_fastp_trimming/463_R2_001.fastp-trim.202003185732.fq.gz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create matrix" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "####################################################################################\r\n", "#\r\n", "# Usage: /Applications/bioinfo/trinityrnaseq-v2.8.6/util/abundance_estimates_to_matrix.pl --est_method sample1.results sample2.results ...\r\n", "#\r\n", "# or /Applications/bioinfo/trinityrnaseq-v2.8.6/util/abundance_estimates_to_matrix.pl --est_method --quant_files file.listing_target_files.txt\r\n", "#\r\n", "# Note, if only a single input file is given, it's expected to contain the paths to all the target abundance estimation files.\r\n", "#\r\n", "# Required:\r\n", "# \r\n", "# --est_method RSEM|eXpress|kallisto|salmon (needs to know what format to expect)\r\n", "#\r\n", "# --gene_trans_map the gene-to-transcript mapping file. (if you don't want gene estimates, indicate 'none'.\r\n", "#\r\n", "#\r\n", "# Options:\r\n", "#\r\n", "# --cross_sample_norm TMM|UpperQuartile|none (default: TMM)\r\n", "#\r\n", "# --name_sample_by_basedir name sample column by dirname instead of filename\r\n", "# --basedir_index default(-2)\r\n", "#\r\n", "# --out_prefix default: value for --est_method\r\n", "#\r\n", "# --quant_files file containing a list of all the target files.\r\n", "#\r\n", "######################################################################################\r\n", "\r\n", "\r\n" ] } ], "source": [ "!perl /Applications/bioinfo/trinityrnaseq-v2.8.6/util/abundance_estimates_to_matrix.pl" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-reading file: /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/kallisto/178_ambient_infected_0/abundance.tsv\n", "-reading file: /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/kallisto/359_ambient_infected_2/abundance.tsv\n", "-reading file: /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/kallisto/463_ambient_infected_17/abundance.tsv\n", "\n", "\n", "* Outputting combined matrix.\n", "\n", "/Applications/bioinfo/trinityrnaseq-v2.8.6/util/support_scripts/run_TMM_scale_matrix.pl --matrix /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.TPM.not_cross_norm > /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.TMM.EXPR.matrixCMD: R --no-save --no-restore --no-site-file --no-init-file -q < /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.TPM.not_cross_norm.runTMM.R 1>&2 \n", "dyld: Library not loaded: @rpath/libreadline.6.2.dylib\n", " Referenced from: /Users/graciecrandall/anaconda/lib/R/lib/libR.dylib\n", " Reason: image not found\n", "sh: line 1: 808 Abort trap: 6 R --no-save --no-restore --no-site-file --no-init-file -q < /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.TPM.not_cross_norm.runTMM.R 1>&2\n", "Error, cmd: R --no-save --no-restore --no-site-file --no-init-file -q < /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.TPM.not_cross_norm.runTMM.R 1>&2 died with ret (34304) at /Applications/bioinfo/trinityrnaseq-v2.8.6/util/support_scripts/run_TMM_scale_matrix.pl line 105.\n", "Error, CMD: /Applications/bioinfo/trinityrnaseq-v2.8.6/util/support_scripts/run_TMM_scale_matrix.pl --matrix /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.TPM.not_cross_norm > /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.TMM.EXPR.matrix died with ret 6400 at /Applications/bioinfo/trinityrnaseq-v2.8.6/util/abundance_estimates_to_matrix.pl line 385.\n" ] } ], "source": [ "!perl /Applications/bioinfo/trinityrnaseq-v2.8.6/util/abundance_estimates_to_matrix.pl \\\n", "--est_method kallisto \\\n", " --gene_trans_map none \\\n", " --out_prefix /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab \\\n", " --name_sample_by_basedir \\\n", " /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/kallisto/178_ambient_infected_0/abundance.tsv \\\n", " /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/kallisto/359_ambient_infected_2/abundance.tsv \\\n", " /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/notebooks/kallisto/463_ambient_infected_17/abundance.tsv " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\t178_ambient_infected_0\t359_ambient_infected_2\t463_ambient_infected_17\r\n", "TRINITY_DN10815_c0_g1_i1\t0\t0\t0\r\n", "TRINITY_DN1636_c2_g1_i11\t0\t20.5709\t0\r\n", "TRINITY_DN23821_c0_g1_i2\t0\t1.16885\t0\r\n", "TRINITY_DN446_c0_g1_i7\t0\t0\t0\r\n", "TRINITY_DN112563_c0_g1_i1\t1\t1\t0\r\n", "TRINITY_DN3547_c2_g1_i14\t1.67035e-08\t0\t0\r\n", "TRINITY_DN22245_c0_g1_i1\t2.74956\t0\t9.72359\r\n", "TRINITY_DN91454_c0_g2_i1\t0\t0\t0\r\n", "TRINITY_DN8799_c0_g1_i6\t0\t0\t0\r\n" ] } ], "source": [ "!head /Users/graciecrandall/Documents/GitHub/paper-tanner-crab/analyses/kallisto-single_crab_over_time/kallisto-single-crab.isoform.counts.matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.6" } }, "nbformat": 4, "nbformat_minor": 2 }