{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Volumes/Data/Sam/scratch\n"
     ]
    }
   ],
   "source": [
    "cd /Volumes/Data/Sam/scratch/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###Quality trim & remove first 39bp from single FASTQ file"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Code explanation:\n",
    "\n",
    "    java -jar /usr/local/bioinformatics/Trimmomatic-0.30/trimmomatic-0.30.jar\n",
    "This line above initiates Trimmomatic and uses the following arguments to specify order of execution:\n",
    "\n",
    "-single end reads (SE)\n",
    "    \n",
    "-number of threads (-threads 16),\n",
    "    \n",
    "-type of quality score (-phred33),\n",
    "\n",
    "-input file location (/Volumes/nightingales/C_gigas/2212_lane2_CTTGTA_L002_R1_001.fastq.gz),\n",
    "\n",
    "-output file name/location (20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz),\n",
    "\n",
    "-single end Illumina TruSeq adaptor trimming (ILLUMINACLIP:/usr/local/bioinformatics/Trimmomatic-0.30/adapters/TruSeq3-SE.fa:2:30:10),\n",
    "\n",
    "-cut number of bases at beginning of each read (HEADCROP:39)\n",
    "\n",
    "-cut number of bases at beginning of read if below quality threshold (LEADING:3)\n",
    "\n",
    "-cut number of bases at end of read if below quality threshold (TRAILING:3)\n",
    "\n",
    "-cut if average quality within 4 base window falls below 15 (SLIDINGWINDOW:4:15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TrimmomaticSE: Started with arguments: -threads 16 -phred33 /Volumes/nightingales/C_gigas/2212_lane2_CTTGTA_L002_R1_001.fastq.gz /Volumes/Data/Sam/scratch/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz ILLUMINACLIP:/usr/local/bioinformatics/Trimmomatic-0.30/adapters/TruSeq3-SE.fa:2:30:10 HEADCROP:39 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15\n",
      "Using Long Clipping Sequence: 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA'\n",
      "Using Long Clipping Sequence: 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'\n",
      "ILLUMINACLIP: Using 0 prefix pairs, 2 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences\n",
      "Input Reads: 16000000 Surviving: 15796545 (98.73%) Dropped: 203455 (1.27%)\n",
      "TrimmomaticSE: Completed successfully\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "java -jar /usr/local/bioinformatics/Trimmomatic-0.30/trimmomatic-0.30.jar \\\n",
    "SE \\\n",
    "-threads 16 \\\n",
    "-phred33 \\\n",
    "/Volumes/nightingales/C_gigas/2212_lane2_CTTGTA_L002_R1_001.fastq.gz \\\n",
    "/Volumes/Data/Sam/scratch/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz \\\n",
    "ILLUMINACLIP:/usr/local/bioinformatics/Trimmomatic-0.30/adapters/TruSeq3-SE.fa:2:30:10 \\\n",
    "HEADCROP:39 \\\n",
    "LEADING:3 \\\n",
    "TRAILING:3 \\\n",
    "SLIDINGWINDOW:4:15"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###FASTQC on trimmed file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Analysis complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Started analysis of 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 5% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 10% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 15% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 20% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 25% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 30% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 35% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 40% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 45% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 50% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 55% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 60% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 65% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 70% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 75% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 80% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 85% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 90% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n",
      "Approx 95% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "fastqc /Volumes/Data/Sam/scratch/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz \\\n",
    "--outdir=/Volumes/Eagle/Arabidopsis/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###Copy files to Eagle for web-based access"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cp 20150506_* /Volumes/Eagle/Arabidopsis/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###Unzip FASTQC output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Archive:  /Volumes/Eagle/Arabidopsis/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc.zip\n",
      "   creating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/\n",
      "   creating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/\n",
      "   creating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/\n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/fastqc_icon.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/warning.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/error.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/tick.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/summary.txt  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_base_quality.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_tile_quality.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_sequence_quality.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_base_sequence_content.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_sequence_gc_content.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_base_n_content.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/sequence_length_distribution.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/duplication_levels.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/adapter_content.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/kmer_profiles.png  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/fastqc_report.html  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/fastqc_data.txt  \n",
      "  inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/fastqc.fo  \n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "unzip /Volumes/Eagle/Arabidopsis/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc.zip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###Move unzipped folder to Eagle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "mv 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/ /Volumes/Eagle/Arabidopsis/"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}