# how to make a subsample of a full sample - here how to get 1 mill reads from a larger sample # structure of command: seqtk sample -s12345 read2.fq 1000000 >sub2.fq # the -s###### is a random seed number used for the stochastic selection of reads and must be identical for both R1 and R2. seqtk sample -s12345 4day_HK1_F_R1_trimmed_paired.fastq 1000000 > 4day_HK1_F_R1_trimmed_subsample_paired.fastq # tophat mapping (bowtie wrapped) -o specifies output directory, -p 2 is the number of CPUs. Remember that the index has to be in your current directory! or do export path tophat2 -o 6hrs_HK1_K \ -p 2 \ --transcriptome-index=gadMor2_ena gadMor2_ena \ ~/INFBIO/trimmed_data/6hrs_HK1_K_R1_trimmed_subsample_paired.fastq \ ~/INFBIO/trimmed_data/6hrs_HK1_K_R2_trimmed_subsample_paired.fastq \ 1>~/INFBIO/analysis/6hrs_HK1_K.out 2>~/INFBIO/analysis/6hrs_HK1_K.err # prediction of transcripts with cufflinks here using 2 cpus, specifying output dir and giving it the output from tophat (a bam file). cufflinks -p 2 -o 4day_HK1_F_cuff 4day_HK1_F/accepted_hits.bam 1> 4day_HK1_F_cuff.out 2> 4day_HK1_F_cuff.err #before running cuffmerge you need to make a list with all the gtf files to be merged and where they are located. I suggest you use the full path if shortcuts are unfamiliar to you #content should look like this: #./6hrs_HK1_K_cuff/transcripts.gtf #then merge with cuffmerge cuffmerge -g ~/INFBIO/transcriptome/gadMor2_ena.gff -s ~/INFBIO/transcriptome/gadMor2_ena.fa -p 2 assembly_list 1>cuffmerge.out 2>cuffmerge.err #we did not actually do cuffdiff due to computation time but it would have looked like this: cuffdiff -o 6hrs_diff_out -b /node/work1/no_backup/monica/reference_genome_based_transcriptome/gadMor2_ena.fa -p 30 \ -u merged_asm/merged.gtf \ 6hrs_HK1_K/accepted_hits.bam,6hrs_HK2_K/accepted_hits.bam,6hrs_HK3_K/accepted_hits.bam,6hrs_HK4_K/accepted_hits.bam,6hrs_HK5_K/accepted_hits.bam,6hrs_HK6_K/accepted_hits.bam \ 6hrs_HK1_F/accepted_hits.bam,6hrs_HK2_F/accepted_hits.bam,6hrs_HK3_F/accepted_hits.bam,6hrs_HK4_F/accepted_hits.bam,6hrs_HK5_F/accepted_hits.bam,6hrs_HK6_F/accepted_hits.bam \ 1>cuffdiff6hrs.out 2>cuffdiff6hrs.err #if you want change the q1/q2 labels after running cuffdiff (i.e. forgot to add labels to command) you can do this in the cuffdiff directory: sed -i.bak -e 's/q1/6hrsC/g;s/q2/6hrsF/g;' *.diff *.info *_tracking # for a simple comparison sed -i.bak -e 's/q1/6hrsC/g;s/q2/6hrsF/g;s/q3/4dayC/g;s/q4/4dayF/g;' *.diff *.info *_tracking # for both timepoints run together ############### fastagrep -p gamo_######## gadMor2.......transcripts.fasta