barplot.bam <- function(vdf=NULL, minlen=1, maxlen=37, poscol="red", negcol="green", main="Sequence length distribution", xlab="Map length", ylab="Count", legend=c("+ve strand","-ve strand"), legendx=NULL, legendy=NULL, plot=TRUE, down=FALSE, sym.axes=TRUE, ...) {


	# count frequencies in the negative strand
	neg <- .sum.bam.length(vdf, "-", minlen, maxlen)
	

	# count frequencies on the positive strand
	pos <- .sum.bam.length(vdf, "+", minlen, maxlen)
	
	# join the two on column "length" and give the result
	# sensible column names
	bavdf <- merge(pos, neg, by="length", sort=FALSE)
	colnames(bavdf) <- c("lgth","pos","neg")

	# if the user wants a plot
	if (plot==TRUE) {


		# draw the barplot of pos and neg frequencies
		# and with the user specified options
		if (down==TRUE) {

			# if the user wants symmetrical y-axes
			if (sym.axes==TRUE) {
				miny <- -1 * max(bavdf[,2:3])
				maxy <- max(bavdf[,2:3])
			} else {
				# else let the data decide
				miny <- -1 * max(bavdf[,3])
				maxy <- max(bavdf[,2])
			}

			# plot the poistive counts
			barplot(as.matrix(t(bavdf[,2])), beside=TRUE, names=bavdf[,1], col=c(poscol), main=main, xlab=xlab, ylab=ylab, ylim=c(miny,maxy), ...)

			# add zero minus the negative counts
			barplot(as.matrix(0-t(bavdf[,3])), beside=TRUE, col=c(negcol), add=TRUE, ...)
		} else {
			barplot(as.matrix(t(bavdf[,2:3])), beside=TRUE, names=bavdf[,1], col=c(poscol,negcol), main=main, xlab=xlab, ylab=ylab, ...)
		}


		# set defaults for legendx and legendy
		# if no values are provided
		if (is.null(legendx)) {
			legendx <- 5
		}

		if (is.null(legendy)) {
			legendy <- max(bavdf[,2:3])/2
		}


		# draw the legend
		legend(legendx,legendy,legend=legend,fill=c(poscol,negcol))
	}

	# return the frequencies
	return(bavdf)
	
}


.sum.bam.length <- function(vdf=NULL, str=NULL, minlen=NULL, maxlen=NULL) {

	# first limit the input to the strand we are interested in
	vdf <- vdf[vdf$strand==str,]

	# count the occurrence of each length of read
	# and convert to a data.frame
	tbl <- table(vdf$cliplen, useNA="always")
	tvdf <- as.data.frame(tbl)

	# The above may not contain every number between
	# minlen and maxlen, so we create a dummy
	# data.frame that does and "outer join" it 
	# to the above result, replacing the resulting
	# NA with zero counts
	yvdf <- data.frame(Counts=minlen:maxlen)
	avdf <- merge(yvdf, tvdf, by.x="Counts", by.y="Var1", all.x=TRUE, sort=FALSE)
	avdf[is.na(avdf)] <- 0

	# give it sensible column and row names
	colnames(avdf) <- c("length","freq")
	rownames(avdf) <- avdf$length
	
	# return 
	return(avdf[order(avdf$length),])
}

clip.bam <- function(vdf = NULL) {

	# set up extra columns with null values
	vdf$softclip <- rep(0,nrow(vdf))
	vdf$leftclip <- rep(0,nrow(vdf))
	vdf$rightclip <- rep(0,nrow(vdf))
	vdf$clipseq   <- as.vector(vdf$seq)

	# we take a copy of the cigar string
	# and remove hard clipping information
	# for convenience
	vdf$cigar2 <- gsub("\\d+H","",vdf$cigar)

	# Record the column indices for cigar columns
	cidx <- grep("cigar", colnames(vdf))[1]
	cidx2 <- grep("cigar", colnames(vdf))[2]

	# get the subset of rows with soft-clipping
	clipr <- grep("S", vdf$cigar)
	svdf <- vdf[clipr,]

	# calculate the total number of soft clipped bases
	# by summing up numbers that are adjacent to an S
	# in the cigar string
	svdf$softclip  <- apply(svdf,1,function(x){sum(as.numeric(strapply(as.character(x[cidx]),"(\\d+)S", simplify=c)))})

	# calculate the number of bases soft clipped from 
	# the left of the read by extracting the number
	# adjacent to any S at the beginning of the string
	svdf$leftclip  <- apply(svdf,1,function(x){sum(as.numeric(strapply(as.character(x[cidx2]),"^(\\d+)S",simplify=c)))})

	# calculate the number of bases soft clipped from 
	# the right of the read by extracting the number
	# adjacent to any S at the end of the string
	svdf$rightclip <- apply(svdf,1,function(x){sum(as.numeric(strapply(as.character(x[cidx2]),"(\\d+)S$",simplify=c)))})

	# extract the sequence in the middle of leftclip and rightclip
	# this is the sequence that has actually aligned
	svdf$clipseq   <- substr(svdf$seq, as.numeric(svdf$leftclip)+1, nchar(as.vector(svdf$seq))-as.numeric(svdf$rightclip))

	# substitute in the values
	vdf$softclip[clipr] <- svdf$softclip
	vdf$leftclip[clipr] <- svdf$leftclip
	vdf$rightclip[clipr] <- svdf$rightclip
	vdf$clipseq[clipr] <- svdf$clipseq

	# calculate the length of the clipped sequence
	vdf$cliplen <- nchar(vdf$clipseq)

	# remove the second cigar string
	# we calculated above
	vdf <- vdf[,-cidx2]

	# return the data
	return(vdf)

}


dna.complement <- function(x=NULL, reverse=TRUE) {

	# convert the sequence to upper case
	x <- toupper(x)

	# split sequence into a vector
	vec <- strsplit(x,split="")[[1]]

	# prepare a vector to hold the
	# revcom data
	cvec <- vector(mode="character",length(vec))

	# for each base....
	for (i in 1:length(vec)) {

		# complement the relevant bases
		if (vec[i] == "A") {
			cvec[i] <- "T"
		}
		if (vec[i] == "G") {
			cvec[i] <- "C"
		}
		if (vec[i] == "C") {
			cvec[i] <- "G"
		}
		if (vec[i] == "T") {
			cvec[i] <- "A"
		}
		if (vec[i] == "U") {
			cvec[i] <- "A"
		}
		if (vec[i] == "N") {
			cvec[i] <- "N"
		}
	}

	# reverse if requested (default)
	if (reverse == TRUE) {
		cvec <- rev(cvec)
	}

	# return the sequence as a string
	return(paste(cvec, sep="", collapse=""))
}


make.pwm <- function(vdf=NULL, minlen=1, maxlen=37, scaled=TRUE, strand="pos", revcom=FALSE, ttou=FALSE) {


	# set the defaults strand and change it
	# if we want to look at the negative strand
	strnum <- "+"
	if (strand == "neg") {
		strnum <- "-"
		
		# negative strand reads need to be reverse
		# complemented as BAM always reports alignments
		# to the positive strand
		revcom <- TRUE
	} 

	# filter the data.frame according to the input
	myvdf <- vdf[vdf$cliplen>=minlen&vdf$cliplen<=maxlen&vdf$strand==strnum,]


	# create a list to hold the output
	lst <- list()
	lst[["A"]] <- vector(mode="numeric", length=maxlen)
	lst[["C"]] <- vector(mode="numeric", length=maxlen)
	lst[["G"]] <- vector(mode="numeric", length=maxlen)
	lst[["T"]] <- vector(mode="numeric", length=maxlen)

	# iterate through the aligned sequences
	# in the filtered input
	for (s in as.vector(myvdf$clipseq)) {

		# reverse complement the read if needed		
		if (revcom == TRUE) {
			s <- dna.complement(s)
		}

		# convert T to U if the user requests it
		if (ttou == TRUE) {
			s <- gsub("T","U", toupper(s))
		}

		# split the sequence into a vector
		# iterate over each base and record counts
		# in the appropriate position using the idx
		# counter
		vec <- strsplit(s,split="")[[1]]
		idx <- 1
		for (v in vec) {
			lst[[v]][idx]=lst[[v]][idx]+1
			idx <- idx+1
		}
	}

	# create a matrix and convert
	# the counts from the list
	out <- matrix(nrow=4, ncol=maxlen)
	out[1,] <- lst[["A"]] 
	out[2,] <- lst[["C"]] 
	out[3,] <- lst[["G"]] 
	out[4,] <- lst[["T"]] 

	# provide appropriate row and column names
	rownames(out) <- c("A","C","G","T")
	colnames(out) <- 1:maxlen

	# vector to hold indices of columns with
	# zero counts
	skipcol <- vector(mode="numeric")

	# if we want to scale the data
	# into proportions (we generally do)
	if (scaled == TRUE) {

		# iterate over the columns
		for(j in 1:ncol(out)) {

			# if the col sums > 0 divide by the total
			# otherwise add it to the columns to be skipped
			if (sum( out[,j] ) > 0) {
				out[,j] <- out[,j] / sum( out[,j] )
			} else {
				skipcol <- append(skipcol, j)
			}
		}
	}

	# remove zero sum columns
	if (length(skipcol) > 0) {
		out <- out[,-skipcol]
	}
	
	# return the data
	return(out)

}


make.simple.consensus <- function(vdf=NULL, reflen=12000) {

	# create a vector for the consensus
	cons <- vector(mode="character", length=reflen)

	# create a list to count bases
	# at each position
	cl <- vector("list", reflen)

	# initialise the list with zeros
	for (i in 1:reflen) {
		cl[[i]]["A"]<-0
		cl[[i]]["G"]<-0
		cl[[i]]["C"]<-0
		cl[[i]]["T"]<-0
		cl[[i]]["N"]<-0
	}

	# iterate through the input data.frame
	for (i in 1:nrow(vdf)) {

		# store the row in r for
		# convenience
		r <- vdf[i,]

		# record the 5p position for convenience
		cpos <- r$pos

		# iterate over each base in the clipped sequence
		for (base in strsplit(toupper(r$clipseq), split="")[[1]]) {

			# record the base at position cpos
			# and move to the next position
			cl[[cpos]][base] <- cl[[cpos]][base] + 1
			cpos <- cpos + 1
		}
	}

	# iterate over the psoitions in the reference
	for (i in 1:reflen) {

		# if there are no bases, record an N
		if (sum(cl[[i]]) == 0) {
			cons[i] <- "N"
		} else {
			# otherwise sort and record the base that occurs
			# the most
			# NB does not deal with ties
			cons[i] <- names(cl[[i]])[order(cl[[i]])[5]]
		}
	}

	# return the consensus as a string	
	fas <- paste(cons, sep="", collapse="")
	return(fas)
}


position.barplot <- function(vdf=NULL, minlen=1, maxlen=37, reflen=10000, samp="", plot=TRUE, poscol="red", negcol="green") {


	# limit the input to only contain reads
	# of size between minlen and maxlen
	vdf <- vdf[vdf$cliplen>=minlen&vdf$cliplen<=maxlen,]

	# get the reference name and size range as strings
	refname = vdf$rname[1]
	maps <- paste(minlen, "-", maxlen, sep="")

	
	# get counts of reads at each position for 
	# negative strand
	npos <- .count.reads.by.position(vdf, "-")

	# get counts of reads at each position for 
	# positive strand
	ppos <- .count.reads.by.position(vdf, "+")

	# both of the above may "miss" positions out
	# due to zero counts so we create a dummy
	# data.frame with all positions in it
	# and outer-join each of npos and ppos to it
	# filling in resulting NS's with 0 counts
	lgths <- data.frame(position=1:reflen)

	lpos <- merge(lgths, ppos, all.x=TRUE, sort=TRUE)
        lneg <- merge(lgths, npos, all.x=TRUE, sort=TRUE)
      
        lpos[is.na(lpos)] <- 0
        lneg[is.na(lneg)] <- 0

	# sort each of the new merged data.frames
	# by position
	lpos <- lpos[order(lpos$position),]
	lneg <- lneg[order(lneg$position),]
      
	# if the users has decided to plot
	if (plot == TRUE) {

		# calculate the range to be plotted
       		mx <- max(c(lpos[,2],lneg[,2]))
        	rng <- c(-1*mx,mx)

		# create a title for the plot
		tit   <- paste(samp,"Distribution of",maps,"bp maps along",refname, sep=" ")

		# create the barplot as counts on the positive strand and 0-counts on the negative strand
		barplot(as.matrix(lpos[,2]), beside=TRUE, names=lpos[,1], col=c(poscol), xlab="Nucleotide position", ylab="Count", border=poscol, ylim=rng, xaxt="n", main=tit)
        	barplot(as.matrix(0-lneg[,2]), beside=TRUE, add=TRUE, col=c(negcol),  border=negcol)

		# add an axis
        	axis(side=1)
	}

	# merge the positive and negative count data.frames
	# and give them useful column names and return to the user
	ret <- merge(lpos, lneg, by="position", sort=FALSE)
	colnames(ret) <- c("position","poscount","negcount")
	return(ret)
}


.count.reads.by.position <- function(vdf=NULL, str=NULL) {

	# subset by strand
	svdf <- vdf[vdf$strand==str,]

	if (nrow(svdf) > 0) {
		# if there are any mappings on this strand
		# count the number of reads by position
		bn <- by(svdf$qname, svdf$pos, length)
		
		# create data.frame and return it
		ret <- data.frame(position=as.integer(names(bn)), count=as.vector(bn))
		return(ret)

	} else {
		# otherwise just return a dummy data.frame
		return(data.frame(position=1,poscount=0))
	}

}


pwm.heatmap <- function(pwm=NULL, col.fun=colorRampPalette(c("black","red"), space="rgb"), mar=c(3,2,1,1), cex.axis=0.8) {

	# set the margins
	par(mar=mar)

	# draw the image
	image(z=t(pwm), y=1:nrow(pwm), x=1:ncol(pwm), xaxt="n", yaxt="n", col=col.fun(10), xlab="",ylab="")

	# add axes
	axis(side=2, at=1:nrow(pwm),  labels=rownames(pwm), cex.axis=cex.axis)

	# add axes
	axis(side=1, at=1:ncol(pwm), labels=colnames(pwm), cex.axis=cex.axis)
}


read.bam <- function (bamfile = NULL, chr = NULL, start = 1, end = 1e+07, what = c("qname", "flag", "rname", "strand", "pos", "qwidth", "mapq", "cigar", "mrnm", "mpos", "isize", "seq"), tag = c("NM"), removeN = TRUE) {


    # read the data using scanBam from Rsamtools() package
    if (is.null(chr)) {

	# if no chromosome if given, read everything
	param <- ScanBamParam(what = what, tag = tag)
    	bam <- scanBam(bamfile, param = param)

    } else {
	# otherwise limit the data by chromosome and the
	# given start and end
    	which <- IRangesList(chr = IRanges(start, end))
    	names(which) <- chr
    	param <- ScanBamParam(which = which, what = what, tag = tag)
    	bam <- scanBam(bamfile, param = param)
    }

    # the result is in a rather difficult
    # to use S4 class/list and so we go through 
    # the following steps to coerce the object
    # to a native data.frame

    # get the names of the object (the columns/"what")
    # and apply lapply
    elts <- names(bam[[1]])
    lst <- lapply(elts, function(elt) .unlist(lapply(bam, "[[", elt)))
    names(lst) <- elts

    # coerce to DataFrame using the in-built Rsamtools function
    vdf <- do.call("DataFrame", lst)
    colnames(vdf) <- names(lst)

    # remove blanks
    vdf <- vdf[!is.na(vdf$qwidth), ]

    # convert important columns to character
    vdf$seq <- as.character(vdf$seq)
    vdf$cigar <- as.character(vdf$cigar)

    # if the user wants to remove sequences
    # that contain N's, remove them
    if (removeN == TRUE) {
        ns <- grep("N", vdf$seq)
        if (length(ns) > 0) {
            vdf <- vdf[-ns, ]
        }
    }

    # call the native as.data,frame function 
    # to create a native data.frame and return it
    vdf <- as.data.frame(vdf, stringsAsFactors = FALSE)
    return(vdf)
}



.unlist <- function (x) {
	# code provided by Martin Morgan (author of Rsamtools)
	# to assist in coercion of Rsamtools classes to data.frames
	## do.call(c, ...) coerces factor to integer, which is undesired
	x1 <- x[[1L]]
	if (is.factor(x1)) {
		structure(unlist(x), class = "factor", levels = levels(x1))
	} else {
		do.call(c, x)
	}
}

read.dist.plot <- function (sr = NULL, minlen = 1, maxlen = 37, method = "add", pad = 30, primary = "pos", plot=TRUE, title="5' read distance plot", xlab="Distance", ylab="Count") {

    # check the input paramters
    # check method argument is valid
    if (! method %in% c("pos", "neg", "mean", "multiply", "add", "min", "max")) {
        print("Method should be one of:")
        print(c("pos", "neg", "mean", "multiply", "add", "min", "max"))
	print(paste("You gave:", method))
        print("Setting method to mean")
        method = "mean"
    }

    # check primary is one of "pos" or "neg"
    if (! primary %in% c("pos", "neg")) {
        print("Primary should be one of:")
        print(c("pos", "neg"))
        print("Setting Primary to pos")
        pri = "pos"
    }

    # subset the input according to the
    # given size range
    psr <- sr[sr$len >= minlen & sr$len <= maxlen, ]

    # calculate the end position of each read
    psr$posend <- psr$pos + (nchar(psr$seq) - 1)

    # calculate the 5 prime and 3 prime ends
    # of each read
    psr$p5 <- psr$pos
    psr$p3 <- psr$pos
    psr$p5[psr$strand == "+"] <- psr$pos[psr$strand == "+"]
    psr$p3[psr$strand == "+"] <- psr$posend[psr$strand == "+"]
    psr$p3[psr$strand == "-"] <- psr$pos[psr$strand == "-"]
    psr$p5[psr$strand == "-"] <- psr$posend[psr$strand == "-"]

    # set the "primary" strand
    # it is this strand that we will iterate over
    # and then we will count overlaps on the opposite strand
    if (primary == "pos") {
        pri <- psr[psr$strand == "+", ]
        alt <- psr[psr$strand == "-", ]
    }   else {
        pri <- psr[psr$strand == "+", ]
        alt <- psr[psr$strand == "-", ]
    }

    # create an empty list for the output
    lst <- list()

    # for each entry in the sequence report
    # for the primary strand
    for (i in 1:nrow(pri)) {

	# r contains the row as a vector
        r <- pri[i, ]

        # set the beginning of the range in which we will
	# count reads on the opposite strand.  Set to 1
	# if it is <= 0
        begin <- r$p5 - pad
        if (begin <= 0) 
            begin <- 1

	# set the ending of the range
        ending <- r$p5 + pad

	# subset the alternative strand
	# according to the defined range
        sn <- alt[alt$p5 >= begin & alt$p5 <= ending, ]

	# proceed only if there are actually reads....
        if (nrow(sn) > 0) {

	    # cum up the counts for reads, by position, within the range
            ssn <- aggregate(sn$count, by = list(pos = sn$p5), sum)

	    # give the result some useful column names
            colnames(ssn) <- c("p5", "count")

	    # for every rown in the result
            for (j in 1:nrow(ssn)) {

		# e contains the 5p end of the data
                e <- ssn[j, ]$p5

		# cnt is the summed counts
                cnt <- ssn[j, ]$count

		# idx is the distance between the query read
		# and the read on the opposite strand
                idx <- as.character(e - r$p5)

		# initialise the output list
		# if no value has been set yet
                if (is.null(lst[[idx]])) 
                  lst[[idx]] <- 0

		# depending on which method has been chosen
		# record the result appropriately
                switch(method, 
		   mean = {
		  # add the average of counts on the primary and alternative strand
                  lst[[idx]] <- lst[[idx]] + (mean(c(cnt, r$count)))
                }, pos = {
		  # add just the count of reads on the primary strand
                  lst[[idx]] <- lst[[idx]] + r$count
                }, neg = {
		  # add just the count of reads on the alternate strand
                  lst[[idx]] <- lst[[idx]] + cnt
                }, multiply = {
		  # add the product of counts on the primary and alternate strands
                  lst[[idx]] <- lst[[idx]] + (cnt * r$count)
                }, add = {
		  # add the sum of counts on the primary and alternate strands
                  lst[[idx]] <- lst[[idx]] + sum(c(cnt,r$count))
                }, min = {
		  # add the min of counts on the primary and alternate strands
                  lst[[idx]] <- lst[[idx]] + min(c(cnt,r$count))
		}, max = {
		  # add the max of counts on the primary and alternate strands
                  lst[[idx]] <- lst[[idx]] + max(c(cnt,r$count))
		})
            }
        }
    }

    # unlist the result and create a data.frame output
    ul <- unlist(lst)
    du <- data.frame(loc = as.numeric(names(ul)), count = as.numeric(ul))

    # order by location
    du <- du[order(du$loc), ]

    # plot if the user wants it
    if (plot == TRUE) {

	plot(du$loc, du$count, type="l", main=title, xlab=xlab, ylab=ylab)

    }

    # return the result
    return(du)
}


sequence.report <- function(df=NULL, minlen=1, maxlen=37) {

	# filter the input according to the provided
	# size range
	mydf <- df[df$cliplen>=minlen&df$cliplen<=maxlen,]

	# use ddply to count the occurence of reads
	# grouped by reference (rname), position (pos), 
	# the actual sequence (clipseq), the sequencelength (cliplen)
	# and strand (strand)
	cts <- ddply(mydf, c("rname","pos","clipseq","cliplen","strand"), nrow)

	# order the result by position, strand, length and count
	cts <- cts[order(cts$pos, cts$strand, cts$cliplen, cts$V1),]

	# reorder the columns and rename them with sensible names
	cts <- cts[,c("rname","pos","strand","clipseq","cliplen","V1")]
	colnames(cts) <- c("ref","pos","strand","seq","len","count")

	# return
	return(cts)
}


size.position.heatmap <- function(dm=NULL, minlen=1, maxlen=37, start=1, end=1e+07, scale=TRUE, col.fun=colorRampPalette(c("black","red"), space="rgb"), log=FALSE, mar=c(5,4,4,2), main=NULL) {
	
	# filter input columns
	mr <- as.integer(rownames(dm))
	mc <- as.integer(colnames(dm))
	dm <- dm[,mc>=minlen&mc<=maxlen]
	dm <- dm[mr>=start&mr<=end,]

	# reset minlen and maxlen in case
	# they were inside the range anyway
	minlen <- min(colnames(dm))
	maxlen <- min(colnames(dm))


	if (log==TRUE) {
		dm <- log(dm+1)
	}

	# scale it if the user wants
	if (scale==TRUE) {
		dm <- t(scale(t(dm)))
		dm[is.na(dm)] <- min(dm[!is.na(dm)])
	}


	# get rownames as vector of integers
	r <- as.integer(rownames(dm))

	# heatmap of read lengths
	par(mar=mar)
	image(z=as.matrix(dm), col=col.fun(32), xaxt="n", yaxt="n", y=1:ncol(dm), x=min(r):max(r), xlab="", ylab="")
	axis(side=2, at=1:ncol(dm),  labels=colnames(dm))
	axis(side=1)
	if (! is.null(main)) {
		title(main)
	}

}



size.strand.bias.plot <- function(bp=NULL, minlen=1, maxlen=37, line.col="red", sym.axes=TRUE, title="Strand bias plot", xlab="+ strand counts", ylab="- strand counts", lty=1, lwd=2, cextxt=1, mar=c(5,4,4,1), tpos=1, ...) {

	# limit the data according to user input
	bp <- bp[bp$lgth>=minlen&bp$lgth<=maxlen,]

	# figure out the axes we will need
	# depending on whether we want
	# symmetrical X and Y axes
	if (sym.axes==TRUE) {
		miny <- min(bp[,c("pos","neg")])
		maxy <- max(bp[,c("pos","neg")])
		minx <- miny
		maxx <- maxy
	} else {
		miny <- min(bp[,c("neg")])
		maxy <- max(bp[,c("neg")])
		minx <- min(bp[,c("pos")])
		maxx <- max(bp[,c("pos")])
	}

	# basic scatter plot
	par(mar=mar)
	plot(bp$pos, bp$neg, main=title, xlab=xlab, ylab=ylab, xlim=c(minx,maxx), ylim=c(miny, maxy), ...)

	# add the line of y=x
	abline(0,1,col=line.col, lwd=lwd, lty=lty)

	# label the points
	text(bp$pos, bp$neg, bp$lgth, cex=cextxt, pos=tpos)


}


stacked.barplot <- function(dm=NULL, minlen=1, maxlen=37, start=1, end=1e+07, internal.margins=c(0,0,0,1), skip.x=2, bicol=NULL, col.fun=rainbow, axis.col="black", main.col="black", main.adj=1, samey=FALSE, ...) {

	# filter input columns
	mr <- as.integer(rownames(dm))
	mc <- as.integer(colnames(dm))
	dm <- dm[,mc>=minlen&mc<=maxlen]
	dm <- dm[mr>=start&mr<=end,]

	# reset minlen and maxlen in case
	# they were inside the range anyway
	minlen <- min(colnames(dm))
	maxlen <- min(colnames(dm))

	# figure out how many graphs we have
	ngraph <- ncol(dm)

	# warn if it is lots!
	if (ngraph>7) {
		print("WARNING: you have chosen to plot a lot of graphs to the current device")
		print("WARNING: this may be too many for the device to cope with")
		print("WARNING: if you get an error message, then consider using a bigger device")
		print("WARNING: with bigger dimensions: see ?png ?jpeg ?pdf")
	}

	# create ngraph rows in the device
	split.screen(c(ngraph,1))

	# set the colours
	if (is.null(bicol)) {
		cols <- col.fun(ncol(dm))
	} else {
		cols <- rep(bicol, ncol(dm))
	}

	# iterate over the number of graphs
	for (i in 1:ngraph) {


		maxy <- max(dm[,i])
		miny <- min(dm[,i])

		if (samey==TRUE) {
			maxy <- max(dm)
			miny <- min(dm)
		}

		# select the relevant screen
		screen(i)
	
		# set the margins inside each graph
		par(mar=internal.margins)

		# plot the graph
		plot(rownames(dm), dm[,i], col=cols[i], type="l", cex.axis=0.7, bty="n", xaxt="n", ylim=c(miny, maxy), ...)

		# only plot x-axis every skip.x graph
		if (i%%skip.x==0) {
			axis(side=1, cex.axis=0.7, col=axis.col, col.axis=axis.col)
		}

		# plot a title for each graph
		title(colnames(dm)[i], line=-1, cex.main=0.7, col.main=main.col, adj=main.adj)
	}

	# close the screens
	close.screen(all=TRUE)

}


summarise.by.length <- function(vdf=NULL, minlen=1, maxlen=37, start=1, end=1e+07, strand=NULL) {

	# sort out end which has been given
	# a silly high default number so as not to miss
	# anything
	if (end > max(vdf$pos)) {
		end <- max(vdf$pos)
	}

	# filter according to input
	vdf <- vdf[vdf$cliplen>=minlen&vdf$cliplen<=maxlen&vdf$pos>=start&vdf$pos<=end,]

	# summarise data for each length of read
	# count the occurrence at each position
	if (is.null(strand)) {
		bypos <- acast(vdf, pos~cliplen, length)
	} else if (strand=="pos") {
		bypos <- acast(vdf[vdf$strand=="+",], pos~cliplen, length)

	} else if (strand=="neg") {
		bypos <- acast(vdf[vdf$strand=="-",], pos~cliplen, length)
	} else {
		print("WARNING: don't understand your strand argument")
		print("WARNING: sending back data for both strands")
		bypos <- acast(vdf, pos~cliplen, length)
	}

	# in case of blanks, create dummy data.frame
	dummy <- data.frame(pos=start:end)

	# and merge it with the results from cast(...)
	dm <- merge(dummy, bypos, by.x="pos", by.y="row.names", all.x=TRUE, sort=FALSE)

	# deal with NAs which are zero counts
	dm[is.na(dm)] <- 0

	# order by position
	dm <- dm[order(dm$pos),]

	# give the result sensible rownames
	rownames(dm) <- dm$pos

	# remove superfluous column
	dm <- dm[,-1]

	# return data
	return(dm)

}