{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulation of data using Julia\n", "\n", "### Rohan L. Fernando\n", "\n", "### June 2015" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Julia Packages \n", "* [List of registered Julia packages](http://docs.julialang.org/en/release-0.1/packages/packagelist/#available-packages) \n", "* Will use [Distributions Package](http://distributionsjl.readthedocs.org/en) to simulate data. \n", "* It can be added to your system with the command:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "#Pkg.add(\"Distributions\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "* This needs to be done only once. \n", "\n", "* But, to access the functions in the Distributions package the \"using\" command has to be invoked as:\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "using Distributions" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulate matrix of ``genotype\" covariates" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": true, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "10x5 Array{Int64,2}:\n", " 1 2 1 1 0\n", " 0 0 0 1 1\n", " 1 0 1 2 2\n", " 2 1 0 0 2\n", " 1 1 1 0 2\n", " 1 0 0 0 1\n", " 0 2 2 1 2\n", " 2 1 2 2 1\n", " 1 0 2 2 0\n", " 2 1 0 2 0" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nRows = 10\n", "nCols = 5\n", "X = sample([0,1,2],(nRows,nCols))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "Each element in $\\mathbf{X}$ is sampled from the array [0,1,2]. " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Other methods of the function ``sample\"" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "scrolled": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "7 methods for generic function sample:" ], "text/plain": [ "# 7 methods for generic function \"sample\":\n", "sample(a::AbstractArray{T,N}) at /Users/rohan/.julia/v0.3/StatsBase/src/sampling.jl:277\n", "sample{T}(a::AbstractArray{T,N},n::Integer) at /Users/rohan/.julia/v0.3/StatsBase/src/sampling.jl:320\n", "sample{T}(a::AbstractArray{T,N},dims::(Int64...,)) at /Users/rohan/.julia/v0.3/StatsBase/src/sampling.jl:324\n", "sample(wv::WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/rohan/.julia/v0.3/StatsBase/src/sampling.jl:335\n", "sample(a::AbstractArray{T,N},wv::WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/rohan/.julia/v0.3/StatsBase/src/sampling.jl:347\n", "sample{T}(a::AbstractArray{T,N},wv::WeightVec{W,Vec<:AbstractArray{T<:Real,1}},n::Integer) at /Users/rohan/.julia/v0.3/StatsBase/src/sampling.jl:529\n", "sample{T}(a::AbstractArray{T,N},wv::WeightVec{W,Vec<:AbstractArray{T<:Real,1}},dims::(Int64...,)) at /Users/rohan/.julia/v0.3/StatsBase/src/sampling.jl:532" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "methods(sample)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Add a column of ones for intercept" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "scrolled": true, "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/plain": [ "10x6 Array{Float64,2}:\n", " 1.0 1.0 2.0 1.0 1.0 0.0\n", " 1.0 0.0 0.0 0.0 1.0 1.0\n", " 1.0 1.0 0.0 1.0 2.0 2.0\n", " 1.0 2.0 1.0 0.0 0.0 2.0\n", " 1.0 1.0 1.0 1.0 0.0 2.0\n", " 1.0 1.0 0.0 0.0 0.0 1.0\n", " 1.0 0.0 2.0 2.0 1.0 2.0\n", " 1.0 2.0 1.0 2.0 2.0 1.0\n", " 1.0 1.0 0.0 2.0 2.0 0.0\n", " 1.0 2.0 1.0 0.0 2.0 0.0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = [ones(nRows,1) X]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulate effects from normal distribution" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "scrolled": false, "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/plain": [ "6-element Array{Float64,1}:\n", " -0.588095 \n", " 0.00300013\n", " -0.490994 \n", " 0.211137 \n", " 0.838283 \n", " 0.259775 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nRowsX, nColsX = size(X)\n", "mean = 0.0\n", "std = 0.5\n", "b = rand(Normal(mean,std),nColsX)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "notes" } }, "source": [ "## Simulate phenotypic values" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "10-element Array{Float64,1}:\n", " 0.428195 \n", " 0.891387 \n", " 2.37782 \n", " 0.0377457\n", " -0.457258 \n", " -2.18597 \n", " -0.405765 \n", " 1.8095 \n", " 0.770982 \n", " 0.0214083" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resStd = 1.0\n", "y = X*b + rand(Normal(0,resStd),nRowsX)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Function to simulate data\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "simDat (generic function with 1 method)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using Distributions\n", "function simDat(nObs,nLoci,bMean,bStd,resStd)\n", " X = [ones(nObs,1) sample([0,1,2],(nObs,nLoci))]\n", " b = rand(Normal(bMean,bStd),size(X,2))\n", " y = X*b + rand(Normal(0.0, resStd),nObs)\n", " return (y,X,b)\n", "end" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Use of function simDat to simulate data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "6-element Array{Float64,1}:\n", " -0.0613726\n", " 0.0924577\n", " -0.276544 \n", " -0.439565 \n", " 0.462588 \n", " -0.0115201" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nObs = 10\n", "nLoci = 5\n", "bMean = 0.0\n", "bStd = 0.5\n", "resStd = 1.0\n", "res = simDat(nObs,nLoci,bMean,bStd,resStd)\n", "y = res[1]\n", "X = res[2]\n", "b = res[3]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "10x6 Array{Float64,2}:\n", " 1.0 2.0 1.0 1.0 1.0 2.0\n", " 1.0 2.0 1.0 1.0 0.0 0.0\n", " 1.0 1.0 2.0 2.0 1.0 0.0\n", " 1.0 2.0 2.0 1.0 2.0 0.0\n", " 1.0 2.0 1.0 0.0 2.0 0.0\n", " 1.0 1.0 0.0 2.0 0.0 0.0\n", " 1.0 0.0 1.0 0.0 2.0 0.0\n", " 1.0 2.0 1.0 1.0 0.0 0.0\n", " 1.0 2.0 0.0 0.0 0.0 1.0\n", " 1.0 2.0 1.0 0.0 2.0 0.0" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## XSim: Genome sampler\n", "\n", "* Simulate SNPs on chromosomes\n", "* Random mating in finite population to generate LD\n", "* Efficient algorithm for sampling sequence data\n", "* [XSim paper](http://g3journal.org/content/early/2015/05/07/g3.115.016683.full.pdf+html)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Install XSim" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# installing package \n", "# only needs to be done once\n", "#Pkg.clone(\"https://github.com/reworkhow/XSim.jl.git\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Initialize sampler" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "using XSim\n", "chrLength = 1.0\n", "numChr = 1\n", "numLoci = 2000\n", "mutRate = 0.0\n", "locusInt = chrLength/numLoci\n", "mapPos = [0:locusInt:(chrLength-0.0001)]\n", "geneFreq = fill(0.5,numLoci)\n", "XSim.init(numChr,numLoci,chrLength,geneFreq,mapPos,mutRate) " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Simulate random mating in finite population" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sampling 500 animals into base population.\n", "Sampling 500 animals into generation: 1\n", "Sampling 500 animals into generation: 2\n", "Sampling 500 animals into generation: 3\n", "Sampling 500 animals into generation: 4\n", "Sampling 500 animals into generation: 5\n", "Sampling 500 animals into generation: 6\n", "Sampling 500 animals into generation: 7\n", "Sampling 500 animals into generation: 8\n", "Sampling 500 animals into generation: 9\n", "Sampling 500 animals into generation: 10\n", "Sampling 500 animals into generation: 11\n", "Sampling 500 animals into generation: 12\n", "Sampling 500 animals into generation: 13\n", "Sampling 500 animals into generation: 14\n", "Sampling 500 animals into generation: 15\n", "Sampling 500 animals into generation: 16\n", "Sampling 500 animals into generation: 17\n", "Sampling 500 animals into generation: 18\n", "Sampling 500 animals into generation: 19\n" ] } ], "source": [ "pop = startPop()\n", "nGen = 20\n", "popSize = 500\n", "pop.popSample(nGen,popSize)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Get genotypes" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "500x2000 Array{Int64,2}:\n", " 2 0 2 1 2 1 2 2 2 1 1 2 1 … 0 2 0 1 1 1 1 1 2 2 1 1\n", " 1 2 1 0 1 2 2 1 1 2 0 2 2 0 1 0 1 2 2 1 2 0 1 0 0\n", " 1 0 1 1 0 1 1 1 0 2 2 1 1 2 1 0 2 2 2 1 2 0 1 1 2\n", " 1 1 0 0 2 0 2 1 1 2 0 1 1 0 2 1 2 2 1 1 1 2 2 1 1\n", " 1 0 0 0 1 1 1 1 2 1 1 0 1 2 0 0 1 0 1 1 1 1 1 0 2\n", " 2 1 2 2 2 1 2 1 1 1 0 1 2 … 1 2 1 1 0 2 0 2 1 0 0 0\n", " 2 2 1 1 0 2 1 2 1 1 1 0 0 1 2 2 1 2 0 2 2 2 1 2 1\n", " 1 1 1 1 0 2 2 1 1 2 0 1 2 2 1 2 0 0 0 1 0 1 1 1 1\n", " 2 1 0 1 1 1 1 1 2 1 0 2 0 1 1 1 1 1 1 1 1 0 0 1 1\n", " 1 1 2 2 2 0 2 1 1 2 1 1 1 1 0 2 2 1 1 1 1 2 2 1 0\n", " 1 0 1 0 1 2 2 1 2 1 1 1 2 … 2 2 1 0 1 0 0 1 0 1 2 1\n", " 0 2 1 1 0 1 2 1 2 2 1 2 2 1 1 2 1 1 2 2 0 0 1 1 2\n", " 1 0 2 2 2 1 1 1 1 1 0 2 2 1 1 2 1 2 1 1 0 1 1 1 0\n", " ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ \n", " 0 0 0 0 0 2 2 0 2 2 2 1 2 1 0 1 2 0 1 2 1 2 2 1 1\n", " 0 0 1 2 0 2 2 2 1 1 0 0 1 1 2 2 2 1 1 1 1 0 2 1 0\n", " 2 1 0 1 1 1 1 1 2 1 0 2 0 … 1 2 2 0 2 1 1 2 1 1 0 1\n", " 2 1 1 2 1 1 0 2 1 1 2 0 0 2 2 1 2 0 1 1 2 0 0 0 1\n", " 0 0 2 1 0 1 1 0 0 2 1 2 0 1 2 0 1 2 1 1 1 1 1 1 1\n", " 1 1 1 1 0 2 1 2 1 0 1 1 0 1 1 1 0 2 1 1 1 0 1 1 0\n", " 1 0 2 1 2 1 2 2 2 2 1 2 1 2 1 1 1 1 0 1 1 1 0 1 1\n", " 1 1 2 1 2 1 2 2 1 2 1 2 1 … 1 0 1 1 2 2 1 1 0 0 2 1\n", " 2 2 1 1 1 1 1 1 1 0 2 1 2 1 2 1 1 0 1 0 2 1 0 1 0\n", " 0 0 1 1 1 1 1 1 2 1 0 1 1 1 2 1 1 1 1 1 1 2 1 0 1\n", " 2 1 1 1 2 0 0 1 1 1 1 1 1 0 0 2 0 2 2 0 0 1 1 1 1\n", " 2 0 2 2 2 0 2 1 1 2 1 2 1 2 2 0 0 1 0 1 0 0 0 2 1" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "M = pop.getGenotypes()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Randomly sample QTL positions " ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "50-element Array{Int64,1}:\n", " 365\n", " 1504\n", " 705\n", " 1777\n", " 286\n", " 1210\n", " 518\n", " 1834\n", " 848\n", " 1333\n", " 1870\n", " 70\n", " 1244\n", " ⋮\n", " 723\n", " 1140\n", " 857\n", " 722\n", " 643\n", " 1713\n", " 1937\n", " 1648\n", " 38\n", " 180\n", " 1404\n", " 1068" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k = size(M,2)\n", "nQTL = 50\n", "QTLPos = sample([1:k],nQTL,replace=false)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Marker positions" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1950-element Array{Int64,1}:\n", " 1\n", " 2\n", " 3\n", " 4\n", " 5\n", " 6\n", " 7\n", " 8\n", " 9\n", " 10\n", " 11\n", " 12\n", " 13\n", " ⋮\n", " 1989\n", " 1990\n", " 1991\n", " 1992\n", " 1993\n", " 1994\n", " 1995\n", " 1996\n", " 1997\n", " 1998\n", " 1999\n", " 2000" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mrkPos = deleteat!([1:k],sort(QTLPos))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## QTL and marker matrices" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "500x50 Array{Int64,2}:\n", " 0 2 0 0 1 0 2 1 1 1 2 1 0 … 1 1 0 1 1 1 2 0 2 1 1 1\n", " 1 2 0 0 0 1 1 1 0 2 0 2 1 1 0 1 0 1 1 1 0 0 0 1 1\n", " 1 2 0 0 1 2 0 2 2 0 0 2 1 2 1 1 1 2 1 2 2 1 1 1 1\n", " 0 0 0 0 0 1 2 2 1 2 2 2 0 0 1 2 1 2 1 1 0 1 0 0 0\n", " 1 1 1 0 0 2 2 1 0 1 1 1 0 1 1 1 1 1 0 1 2 0 0 2 0\n", " 1 1 0 1 1 2 1 2 2 1 1 1 1 … 0 0 2 0 1 1 1 2 1 0 1 1\n", " 1 2 2 1 1 1 2 2 1 2 1 2 1 1 0 1 0 0 0 1 1 0 0 0 0\n", " 2 1 1 2 0 1 0 2 1 2 1 2 1 1 1 1 1 0 1 0 0 0 0 2 1\n", " 1 1 1 2 1 1 2 1 2 1 1 2 1 0 2 1 0 1 1 1 0 1 1 2 2\n", " 2 0 0 0 0 1 2 2 1 0 1 2 2 1 2 0 0 2 1 2 1 0 0 1 1\n", " 1 0 1 2 1 0 1 2 1 2 1 1 2 … 1 0 0 1 0 1 0 1 0 1 0 2\n", " 1 1 0 0 0 2 2 0 1 1 2 1 1 2 1 1 0 2 2 1 0 0 0 1 1\n", " 0 0 0 1 2 0 1 0 2 1 2 0 2 2 1 1 1 1 2 0 1 0 0 2 1\n", " ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ \n", " 0 0 1 1 0 2 2 1 1 0 2 2 0 2 2 1 0 1 1 1 2 0 1 1 1\n", " 2 1 1 0 1 1 0 1 1 1 1 2 2 2 0 1 2 1 1 2 0 0 0 1 2\n", " 0 0 2 1 1 0 2 2 1 1 2 2 2 … 1 1 0 1 1 0 0 2 1 1 2 2\n", " 1 2 1 0 1 2 2 1 2 1 0 0 0 1 0 0 2 0 0 1 1 0 2 0 2\n", " 0 1 0 2 2 2 0 1 2 0 2 2 1 1 2 2 0 2 0 2 1 1 1 1 0\n", " 1 0 1 1 0 2 0 1 0 1 1 1 1 1 2 1 0 1 2 1 0 2 0 0 0\n", " 1 1 1 1 0 1 1 0 1 0 2 2 2 1 1 1 1 1 2 1 0 0 0 0 1\n", " 1 2 0 1 1 1 2 1 1 1 0 1 0 … 1 2 0 0 1 0 1 0 0 1 1 1\n", " 2 2 1 2 2 2 1 1 0 2 2 1 0 1 1 0 2 1 1 2 1 0 0 1 1\n", " 0 2 0 1 0 2 1 1 1 2 2 2 0 1 1 1 1 2 1 1 1 0 0 2 1\n", " 0 2 2 1 2 2 2 0 2 0 1 2 2 0 1 0 0 2 1 1 0 0 1 1 2\n", " 0 1 0 1 0 2 0 1 1 1 0 1 1 1 0 1 2 0 2 1 1 0 1 1 0" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Q = M[:,QTLPos]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "500x1950 Array{Int64,2}:\n", " 2 0 2 1 2 1 2 2 2 1 1 2 1 … 0 2 0 1 1 1 1 1 2 2 1 1\n", " 1 2 1 0 1 2 2 1 1 2 0 2 2 0 1 0 1 2 2 1 2 0 1 0 0\n", " 1 0 1 1 0 1 1 1 0 2 2 1 1 2 1 0 2 2 2 1 2 0 1 1 2\n", " 1 1 0 0 2 0 2 1 1 2 0 1 1 0 2 1 2 2 1 1 1 2 2 1 1\n", " 1 0 0 0 1 1 1 1 2 1 1 0 1 2 0 0 1 0 1 1 1 1 1 0 2\n", " 2 1 2 2 2 1 2 1 1 1 0 1 2 … 1 2 1 1 0 2 0 2 1 0 0 0\n", " 2 2 1 1 0 2 1 2 1 1 1 0 0 1 2 2 1 2 0 2 2 2 1 2 1\n", " 1 1 1 1 0 2 2 1 1 2 0 1 2 2 1 2 0 0 0 1 0 1 1 1 1\n", " 2 1 0 1 1 1 1 1 2 1 0 2 0 1 1 1 1 1 1 1 1 0 0 1 1\n", " 1 1 2 2 2 0 2 1 1 2 1 1 1 1 0 2 2 1 1 1 1 2 2 1 0\n", " 1 0 1 0 1 2 2 1 2 1 1 1 2 … 2 2 1 0 1 0 0 1 0 1 2 1\n", " 0 2 1 1 0 1 2 1 2 2 1 2 2 1 1 2 1 1 2 2 0 0 1 1 2\n", " 1 0 2 2 2 1 1 1 1 1 0 2 2 1 1 2 1 2 1 1 0 1 1 1 0\n", " ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ \n", " 0 0 0 0 0 2 2 0 2 2 2 1 2 1 0 1 2 0 1 2 1 2 2 1 1\n", " 0 0 1 2 0 2 2 2 1 1 0 0 1 1 2 2 2 1 1 1 1 0 2 1 0\n", " 2 1 0 1 1 1 1 1 2 1 0 2 0 … 1 2 2 0 2 1 1 2 1 1 0 1\n", " 2 1 1 2 1 1 0 2 1 1 2 0 0 2 2 1 2 0 1 1 2 0 0 0 1\n", " 0 0 2 1 0 1 1 0 0 2 1 2 0 1 2 0 1 2 1 1 1 1 1 1 1\n", " 1 1 1 1 0 2 1 2 1 0 1 1 0 1 1 1 0 2 1 1 1 0 1 1 0\n", " 1 0 2 1 2 1 2 2 2 2 1 2 1 2 1 1 1 1 0 1 1 1 0 1 1\n", " 1 1 2 1 2 1 2 2 1 2 1 2 1 … 1 0 1 1 2 2 1 1 0 0 2 1\n", " 2 2 1 1 1 1 1 1 1 0 2 1 2 1 2 1 1 0 1 0 2 1 0 1 0\n", " 0 0 1 1 1 1 1 1 2 1 0 1 1 1 2 1 1 1 1 1 1 2 1 0 1\n", " 2 1 1 1 2 0 0 1 1 1 1 1 1 0 0 2 0 2 2 0 0 1 1 1 1\n", " 2 0 2 2 2 0 2 1 1 2 1 2 1 2 2 0 0 1 0 1 0 0 0 2 1" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = M[:,mrkPos]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Simulation of breeding values and phenotypic values" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning: imported binding for ans overwritten in module Main\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "genetic variance = 25.0\n", "genetic variance = 25.00 " ] } ], "source": [ "nQTL = size(Q,2)\n", "nObs = size(Q,1)\n", "α = rand(Normal(0,1),nQTL)\n", "a = Q*α\n", "# scaling breeding values to have variance 25.0\n", "v = var(a)\n", "genVar = 25.0\n", "a *= sqrt(genVar/v)\n", "ans = var(a)\n", "println(\"genetic variance = \", ans)\n", "# formatted printing\n", "@printf \"genetic variance = %8.2f \" ans" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "phenotypic mean = 98.77 \n", "phenotypic variance = 102.91 \n" ] } ], "source": [ "resVar = 75.0\n", "resStd = sqrt(resVar)\n", "e = rand(Normal(0,resStd),nObs)\n", "y = 100 + a + e\n", "@printf \"phenotypic mean = %8.2f \\n\" Base.mean(y)\n", "@printf \"phenotypic variance = %8.2f \\n\" var(y)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulate Crossbreeding using XSim" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "scrolled": true, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sampling 100 animals into generation: 1\n", "Sampling 100 animals into generation: 1\n" ] }, { "data": { "text/plain": [ "100x2000 Array{Int64,2}:\n", " 0 1 1 1 1 1 2 1 1 2 2 1 2 … 2 1 0 1 1 1 1 2 0 1 1 1\n", " 1 0 0 0 0 1 2 1 2 2 2 0 1 2 1 2 0 2 1 2 1 1 1 2 2\n", " 1 0 1 1 1 2 2 0 1 1 0 2 0 1 1 0 1 0 1 0 1 1 1 0 1\n", " 1 1 1 1 1 1 1 1 1 1 2 2 2 1 2 1 1 1 0 2 1 1 1 2 1\n", " 0 0 1 1 1 1 2 1 2 2 0 0 1 1 2 2 1 2 0 2 2 2 1 2 1\n", " 0 1 1 2 2 1 0 2 1 0 0 1 1 … 0 0 2 1 2 2 0 0 1 1 0 1\n", " 2 2 0 1 1 2 1 2 1 0 2 0 1 0 2 2 1 1 0 1 1 1 0 1 0\n", " 2 1 0 1 1 2 2 2 1 0 1 1 1 0 1 2 2 2 1 1 1 2 1 1 1\n", " 1 0 2 2 2 1 1 2 0 1 1 0 1 1 1 1 0 1 0 1 2 1 0 1 1\n", " 2 1 1 1 1 0 1 2 1 1 2 1 1 1 1 0 1 1 1 0 2 0 1 0 1\n", " 1 0 0 1 1 2 2 1 1 1 1 1 1 … 1 2 1 2 0 1 0 1 0 0 0 0\n", " 1 2 0 1 1 1 2 2 0 1 1 0 2 2 2 2 1 1 0 2 0 2 1 2 1\n", " 1 2 1 0 1 1 1 1 1 1 1 2 2 0 0 2 0 2 2 0 0 0 2 0 0\n", " ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ \n", " 0 1 2 0 0 1 0 1 1 1 1 0 1 2 1 2 1 2 1 2 0 1 1 2 0\n", " 1 0 2 2 2 1 1 1 1 1 0 2 2 1 0 2 0 1 1 0 0 0 1 0 0\n", " 1 1 0 0 2 1 0 1 2 1 0 1 1 … 1 2 1 0 1 1 2 1 1 1 1 2\n", " 2 1 1 2 2 1 1 2 1 2 2 1 0 1 2 1 0 0 0 1 0 1 0 2 1\n", " 1 1 0 0 2 0 2 2 1 1 0 2 2 1 1 0 0 1 0 0 1 0 0 1 0\n", " 2 0 2 2 1 2 1 1 0 1 2 2 1 2 1 0 1 1 1 1 0 1 2 1 1\n", " 1 1 1 0 1 0 0 1 1 2 1 2 1 2 1 0 1 2 1 1 1 0 1 2 1\n", " 2 1 1 1 2 1 0 1 1 2 1 1 1 … 0 1 2 0 2 2 1 1 2 2 0 2\n", " 1 2 1 1 2 0 1 1 1 2 1 1 0 2 2 1 1 2 1 1 0 0 1 2 1\n", " 1 1 0 2 0 1 2 1 1 2 2 1 2 0 0 0 1 1 0 1 2 0 1 0 1\n", " 1 0 0 2 1 2 2 1 1 1 1 1 0 1 1 2 1 2 1 1 1 2 0 1 2\n", " 0 0 1 1 0 2 2 1 2 2 1 0 2 1 1 1 2 1 2 0 1 1 0 1 1" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "popA = pop.popNew(100)\n", "popB = pop.popNew(100)\n", "popAB = XSim.popCross(100,popA,popB)\n", "MAB = popAB.getGenotypes()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Calculate and plot gene frequencies " ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "freq=Base.mean(M,1)/2;" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " Gene Frequency\n", " \n", " \n", " 0.0\n", " 0.2\n", " 0.4\n", " 0.6\n", " 0.8\n", " 1.0\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " 0\n", " 10\n", " 20\n", " 30\n", " 40\n", " 50\n", " 60\n", " \n", " \n", " Frequency\n", " \n", " \n", " Distribution of Gene Frequency\n", " \n", "\n", "\n", "\n", " \n", "\n", "\n" ], "text/html": [ "\n", "\n", "\n", " \n", " Gene Frequency\n", " \n", " \n", " -1.2\n", " -1.0\n", " -0.8\n", " -0.6\n", " -0.4\n", " -0.2\n", " 0.0\n", " 0.2\n", " 0.4\n", " 0.6\n", " 0.8\n", " 1.0\n", " 1.2\n", " 1.4\n", " 1.6\n", " 1.8\n", " 2.0\n", " 2.2\n", " -1.00\n", " -0.95\n", " -0.90\n", " -0.85\n", " -0.80\n", " -0.75\n", " -0.70\n", " -0.65\n", " -0.60\n", " -0.55\n", " -0.50\n", " -0.45\n", " -0.40\n", " -0.35\n", " -0.30\n", " -0.25\n", " -0.20\n", " -0.15\n", " -0.10\n", " -0.05\n", " 0.00\n", " 0.05\n", " 0.10\n", " 0.15\n", " 0.20\n", " 0.25\n", " 0.30\n", " 0.35\n", " 0.40\n", " 0.45\n", " 0.50\n", " 0.55\n", " 0.60\n", " 0.65\n", " 0.70\n", " 0.75\n", " 0.80\n", " 0.85\n", " 0.90\n", " 0.95\n", " 1.00\n", " 1.05\n", " 1.10\n", " 1.15\n", " 1.20\n", " 1.25\n", " 1.30\n", " 1.35\n", " 1.40\n", " 1.45\n", " 1.50\n", " 1.55\n", " 1.60\n", " 1.65\n", " 1.70\n", " 1.75\n", " 1.80\n", " 1.85\n", " 1.90\n", " 1.95\n", " 2.00\n", " -1\n", " 0\n", " 1\n", " 2\n", " -1.0\n", " -0.9\n", " -0.8\n", " -0.7\n", " -0.6\n", " -0.5\n", " -0.4\n", " -0.3\n", " -0.2\n", " -0.1\n", " 0.0\n", " 0.1\n", " 0.2\n", " 0.3\n", " 0.4\n", " 0.5\n", " 0.6\n", " 0.7\n", " 0.8\n", " 0.9\n", " 1.0\n", " 1.1\n", " 1.2\n", " 1.3\n", " 1.4\n", " 1.5\n", " 1.6\n", " 1.7\n", " 1.8\n", " 1.9\n", " 2.0\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " -70\n", " -60\n", " -50\n", " -40\n", " -30\n", " -20\n", " -10\n", " 0\n", " 10\n", " 20\n", " 30\n", " 40\n", " 50\n", " 60\n", " 70\n", " 80\n", " 90\n", " 100\n", " 110\n", " 120\n", " 130\n", " -60\n", " -58\n", " -56\n", " -54\n", " -52\n", " -50\n", " -48\n", " -46\n", " -44\n", " -42\n", " -40\n", " -38\n", " -36\n", " -34\n", " -32\n", " -30\n", " -28\n", " -26\n", " -24\n", " -22\n", " -20\n", " -18\n", " -16\n", " -14\n", " -12\n", " -10\n", " -8\n", " -6\n", " -4\n", " -2\n", " 0\n", " 2\n", " 4\n", " 6\n", " 8\n", " 10\n", " 12\n", " 14\n", " 16\n", " 18\n", " 20\n", " 22\n", " 24\n", " 26\n", " 28\n", " 30\n", " 32\n", " 34\n", " 36\n", " 38\n", " 40\n", " 42\n", " 44\n", " 46\n", " 48\n", " 50\n", " 52\n", " 54\n", " 56\n", " 58\n", " 60\n", " 62\n", " 64\n", " 66\n", " 68\n", " 70\n", " 72\n", " 74\n", " 76\n", " 78\n", " 80\n", " 82\n", " 84\n", " 86\n", " 88\n", " 90\n", " 92\n", " 94\n", " 96\n", " 98\n", " 100\n", " 102\n", " 104\n", " 106\n", " 108\n", " 110\n", " 112\n", " 114\n", " 116\n", " 118\n", " 120\n", " -100\n", " 0\n", " 100\n", " 200\n", " -60\n", " -55\n", " -50\n", " -45\n", " -40\n", " -35\n", " -30\n", " -25\n", " -20\n", " -15\n", " -10\n", " -5\n", " 0\n", " 5\n", " 10\n", " 15\n", " 20\n", " 25\n", " 30\n", " 35\n", " 40\n", " 45\n", " 50\n", " 55\n", " 60\n", " 65\n", " 70\n", " 75\n", " 80\n", " 85\n", " 90\n", " 95\n", " 100\n", " 105\n", " 110\n", " 115\n", " 120\n", " \n", " \n", " Frequency\n", " \n", " \n", " Distribution of Gene Frequency\n", " \n", "\n", "\n", "\n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "Plot(...)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using Gadfly\n", "plot(x=freq,Geom.histogram,\n", "Guide.title(\"Distribution of Gene Frequency\"),\n", "Guide.ylabel(\"Frequency\"),\n", "Guide.xlabel(\"Gene Frequency\"))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "scrolled": true, "slideshow": { "slide_type": "skip" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[NbConvertApp] Using existing profile dir: u'/Users/rohan/.ipython/profile_default'\n", "[NbConvertApp] Converting notebook wrkShpSlides1.ipynb to slides\n", "[NbConvertApp] Support files will be in wrkShpSlides1_files/\n", "[NbConvertApp] Loaded template slides_reveal.tpl\n", "[NbConvertApp] Writing 416961 bytes to wrkShpSlides1.slides.html\n" ] } ], "source": [ ";ipython nbconvert --to slides wrkShpSlides1.ipynb" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Julia 0.3.7", "language": "julia", "name": "julia 0.3" }, "language_info": { "name": "julia", "version": "0.3.7" } }, "nbformat": 4, "nbformat_minor": 0 }