{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulation of data using Julia\n", "\n", "### Rohan L. Fernando\n", "\n", "### November 2015" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Julia Packages \n", "* [List of registered Julia packages](http://pkg.julialang.org) \n", "* Will use [Distributions Package](http://distributionsjl.readthedocs.org/en) to simulate data. \n", "* It can be added to your system with the command:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "#Pkg.add(\"Distributions\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "* This needs to be done only once. \n", "\n", "* But, to access the functions in the Distributions package the \"using\" command has to be invoked as:\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "using Distributions" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulate matrix of ``genotype\" covariates" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "scrolled": true, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "10x5 Array{Int64,2}:\n", " 0 0 2 0 1\n", " 2 1 1 0 0\n", " 0 0 2 2 0\n", " 0 2 1 2 0\n", " 0 2 2 1 2\n", " 2 1 2 2 2\n", " 1 2 0 1 1\n", " 1 0 2 1 2\n", " 2 0 0 2 0\n", " 2 2 2 0 2" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nRows = 10\n", "nCols = 5\n", "X = sample([0;1;2],(nRows,nCols))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "Each element in $\\mathbf{X}$ is sampled from the array [0,1,2]. " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Other methods of the function ``sample\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "7 methods for generic function sample:" ], "text/plain": [ "# 7 methods for generic function \"sample\":\n", "sample(a::AbstractArray{T,N}) at /Users/rohan/.julia/v0.4/StatsBase/src/sampling.jl:277\n", "sample{T}(a::AbstractArray{T,N}, n::Integer) at /Users/rohan/.julia/v0.4/StatsBase/src/sampling.jl:320\n", "sample{T}(a::AbstractArray{T,N}, dims::Tuple{Vararg{Int64}}) at /Users/rohan/.julia/v0.4/StatsBase/src/sampling.jl:324\n", "sample(wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/rohan/.julia/v0.4/StatsBase/src/sampling.jl:335\n", "sample(a::AbstractArray{T,N}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/rohan/.julia/v0.4/StatsBase/src/sampling.jl:347\n", "sample{T}(a::AbstractArray{T,N}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}, n::Integer) at /Users/rohan/.julia/v0.4/StatsBase/src/sampling.jl:529\n", "sample{T}(a::AbstractArray{T,N}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}, dims::Tuple{Vararg{Int64}}) at /Users/rohan/.julia/v0.4/StatsBase/src/sampling.jl:532" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "methods(sample)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Add a column of ones for intercept" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "scrolled": false, "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/plain": [ "10x6 Array{Float64,2}:\n", " 1.0 0.0 0.0 2.0 0.0 1.0\n", " 1.0 2.0 1.0 1.0 0.0 0.0\n", " 1.0 0.0 0.0 2.0 2.0 0.0\n", " 1.0 0.0 2.0 1.0 2.0 0.0\n", " 1.0 0.0 2.0 2.0 1.0 2.0\n", " 1.0 2.0 1.0 2.0 2.0 2.0\n", " 1.0 1.0 2.0 0.0 1.0 1.0\n", " 1.0 1.0 0.0 2.0 1.0 2.0\n", " 1.0 2.0 0.0 0.0 2.0 0.0\n", " 1.0 2.0 2.0 2.0 0.0 2.0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = [ones(nRows,1) X]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulate effects from normal distribution" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": false, "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/plain": [ "6-element Array{Float64,1}:\n", " -0.784395\n", " 0.87821 \n", " 0.714257\n", " -0.439881\n", " 0.30109 \n", " -0.834222" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nRowsX, nColsX = size(X)\n", "mean = 0.0\n", "std = 0.5\n", "b = rand(Normal(mean,std),nColsX)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "notes" } }, "source": [ "## Simulate phenotypic values" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "10-element Array{Float64,1}:\n", " -2.85294 \n", " 1.95599 \n", " -1.92352 \n", " 0.568759\n", " -1.33026 \n", " 1.00019 \n", " 0.442988\n", " -4.11003 \n", " 0.512736\n", " 0.897029" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resStd = 1.0\n", "y = X*b + rand(Normal(0,resStd),nRowsX)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Function to simulate data\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "simDat (generic function with 1 method)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using Distributions\n", "function simDat(nObs,nLoci,bMean,bStd,resStd)\n", " X = [ones(nObs,1) sample([0;1;2],(nObs,nLoci))]\n", " b = rand(Normal(bMean,bStd),size(X,2))\n", " y = X*b + rand(Normal(0.0, resStd),nObs)\n", " return (y,X,b)\n", "end" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Use of function simDat to simulate data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "6-element Array{Float64,1}:\n", " 0.347378 \n", " 0.234197 \n", " -0.131814 \n", " -0.845628 \n", " 0.0233665\n", " 0.123051 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nObs = 10\n", "nLoci = 5\n", "bMean = 0.0\n", "bStd = 0.5\n", "resStd = 1.0\n", "res = simDat(nObs,nLoci,bMean,bStd,resStd)\n", "y = res[1]\n", "X = res[2]\n", "b = res[3]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## XSim: Genome sampler\n", "\n", "* Simulate SNPs on chromosomes\n", "* Random mating in finite population to generate LD\n", "* Efficient algorithm for sampling sequence data\n", "* [XSim paper](http://g3journal.org/content/early/2015/05/07/g3.115.016683.full.pdf+html)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Install XSim" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# installing package \n", "# only needs to be done once\n", "#Pkg.clone(\"https://github.com/reworkhow/XSim.jl.git\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Initialize sampler" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "using XSim\n", "chrLength = 1.0\n", "numChr = 1\n", "numLoci = 2000\n", "mutRate = 0.0\n", "locusInt = chrLength/numLoci\n", "mapPos = collect(0:locusInt:(chrLength-0.0001))\n", "geneFreq = fill(0.5,numLoci)\n", "XSim.init(numChr,numLoci,chrLength,geneFreq,mapPos,mutRate) " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Simulate random mating in finite population" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sample Founders" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sampling 500 animals into base population.\n", "Sampling 500 animals into base population.\n" ] } ], "source": [ "popSizeFounder = 500\n", "sires = sampleFounders(popSizeFounder)\n", "dams = sampleFounders(popSizeFounder)\n", "nothing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Random Mating " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generation 2: sampling 250 males and 250 females\n", "Generation 3: sampling 250 males and 250 females\n", "Generation 4: sampling 250 males and 250 females\n", "Generation 5: sampling 250 males and 250 females\n", "Generation 6: sampling 250 males and 250 females\n", "Generation 7: sampling 250 males and 250 females\n", "Generation 8: sampling 250 males and 250 females\n", "Generation 9: sampling 250 males and 250 females\n", "Generation 10: sampling 250 males and 250 females\n", "Generation 11: sampling 250 males and 250 females\n", "Generation 12: sampling 250 males and 250 females\n", "Generation 13: sampling 250 males and 250 females\n", "Generation 14: sampling 250 males and 250 females\n", "Generation 15: sampling 250 males and 250 females\n", "Generation 16: sampling 250 males and 250 females\n", "Generation 17: sampling 250 males and 250 females\n", "Generation 18: sampling 250 males and 250 females\n", "Generation 19: sampling 250 males and 250 females\n", "Generation 20: sampling 250 males and 250 females\n", "Generation 21: sampling 250 males and 250 females\n" ] } ], "source": [ "ngen,popSize = 20,500\n", "sires1,dams1,gen1 = sampleRan(popSize, ngen, sires, dams);" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Get genotypes" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "500x2000 Array{Int64,2}:\n", " 0 1 1 0 2 1 1 1 1 1 1 1 0 … 2 2 2 1 1 0 1 1 2 1 1 2\n", " 1 1 2 0 1 1 1 0 1 2 0 0 1 0 1 0 1 2 0 1 2 2 1 2 1\n", " 2 1 0 2 1 1 1 2 1 1 1 1 0 1 0 2 1 0 1 1 2 1 2 0 0\n", " 1 1 1 1 1 2 1 1 2 2 2 1 2 0 0 1 2 2 0 1 2 1 1 2 1\n", " 0 0 1 2 0 1 0 1 1 0 1 1 0 1 2 2 2 2 2 0 1 0 1 1 2\n", " 0 0 1 0 0 1 0 2 2 2 0 1 1 … 0 2 0 1 1 1 0 2 1 0 2 1\n", " 2 0 0 2 1 1 1 1 1 2 0 0 1 1 1 0 2 0 2 0 1 0 0 1 0\n", " 1 2 1 2 1 1 1 1 1 0 1 0 1 1 1 1 0 2 1 1 2 2 1 2 2\n", " 2 1 1 1 2 1 1 2 1 0 1 2 1 2 1 0 1 1 1 0 1 1 2 1 1\n", " 1 0 2 2 1 1 1 1 2 0 1 2 0 0 1 1 1 2 1 1 2 1 1 1 1\n", " 1 1 2 1 1 1 0 0 1 1 0 1 1 … 1 2 1 1 2 2 0 1 0 2 0 0\n", " 1 1 1 2 0 0 0 1 0 1 0 0 0 0 0 2 1 1 0 1 2 1 0 1 1\n", " 1 0 1 0 0 0 0 1 1 2 0 2 1 2 0 2 2 0 2 1 2 1 2 0 1\n", " ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ \n", " 2 1 1 2 1 1 2 2 2 1 2 1 0 1 0 2 1 1 1 1 1 0 1 0 2\n", " 1 2 1 0 2 1 1 1 1 1 0 1 2 1 0 1 1 1 1 1 2 1 1 1 1\n", " 0 0 1 1 2 1 0 2 0 1 0 1 0 … 2 2 1 0 1 0 2 1 1 1 2 0\n", " 0 2 2 0 2 0 2 2 2 2 0 2 2 0 1 2 1 0 1 1 2 1 1 0 1\n", " 1 2 1 2 1 2 1 0 2 1 1 1 2 1 1 1 2 1 1 1 0 0 1 0 2\n", " 0 0 2 1 0 0 1 1 1 1 0 2 0 2 0 0 2 2 2 0 0 0 2 0 2\n", " 1 0 1 0 1 0 0 0 0 1 0 2 1 2 0 1 1 0 2 2 1 0 2 0 2\n", " 1 0 1 1 1 2 1 1 1 1 1 0 1 … 2 1 2 1 1 1 1 2 1 0 1 1\n", " 0 1 0 0 2 1 2 0 1 2 0 0 1 0 2 1 1 0 1 1 1 1 1 1 1\n", " 0 0 1 0 1 0 2 1 2 0 2 0 0 0 0 1 1 2 0 1 1 2 2 2 2\n", " 1 1 2 2 1 2 0 0 2 1 1 0 1 0 1 1 0 0 0 1 1 0 1 0 0\n", " 1 0 2 1 0 2 1 2 1 2 0 0 0 1 1 2 2 2 0 1 2 1 0 2 1" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "animals = concatCohorts(sires1,dams1)\n", "M = getOurGenotypes(animals)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Randomly sample QTL positions " ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "nQTL = 50\n", "selQTL = fill(false,numLoci)\n", "selQTL[sample(1:numLoci,nQTL,replace=false)] = true \n", "selMrk = !selQTL;" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## QTL and marker matrices" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "500x50 Array{Int64,2}:\n", " 1 2 1 0 2 0 0 2 1 0 0 0 2 … 2 1 0 2 1 1 0 0 1 1 1 2\n", " 2 1 1 1 0 2 0 1 0 1 2 0 1 2 2 1 2 1 1 2 1 1 0 1 2\n", " 1 2 2 2 0 1 1 2 1 2 0 1 1 1 0 2 1 0 0 1 0 1 1 1 0\n", " 2 2 1 2 1 1 1 2 0 1 1 0 1 1 1 2 1 2 1 1 1 0 2 1 1\n", " 0 0 1 0 1 1 2 2 1 2 1 1 1 2 1 2 2 1 0 2 0 0 2 0 1\n", " 2 2 1 1 1 1 2 2 1 2 0 1 1 … 1 1 2 2 0 0 1 0 0 0 1 1\n", " 2 1 1 2 0 0 2 2 2 2 0 1 2 1 1 0 0 1 1 0 1 1 1 1 1\n", " 0 1 0 1 1 1 1 2 0 2 2 2 2 2 1 1 2 1 2 0 1 0 1 1 2\n", " 0 1 0 2 0 1 1 2 2 2 1 0 1 0 0 0 1 0 1 2 0 1 2 1 2\n", " 0 0 0 1 1 1 0 2 2 1 1 0 0 0 2 0 1 2 0 1 0 1 1 0 2\n", " 1 2 1 1 2 2 2 1 1 1 1 2 2 … 1 0 0 1 1 2 1 2 0 1 0 1\n", " 1 1 1 2 0 0 1 2 1 1 1 1 2 2 0 0 1 1 1 2 1 1 1 2 2\n", " 2 1 2 2 1 1 1 2 0 0 1 0 1 2 0 1 2 0 0 1 1 0 2 0 1\n", " ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ \n", " 1 2 1 1 1 1 1 2 2 2 0 0 1 1 0 2 1 2 1 1 0 1 2 1 1\n", " 1 1 2 1 1 2 1 1 2 1 2 0 1 1 2 0 2 1 1 1 0 0 1 1 2\n", " 1 1 0 2 0 2 1 1 1 2 2 0 0 … 0 1 1 0 1 2 2 0 1 2 0 0\n", " 2 0 2 1 2 2 0 1 2 0 1 1 2 2 0 2 2 2 1 0 1 1 2 1 1\n", " 1 2 0 1 2 1 1 1 1 2 2 1 0 1 1 1 1 1 1 1 1 1 0 2 1\n", " 1 0 2 1 1 0 0 2 0 2 1 0 0 1 1 2 0 1 0 1 1 2 0 2 0\n", " 1 2 2 2 1 2 0 2 1 0 1 1 2 2 2 1 0 1 2 1 2 0 2 1 0\n", " 1 0 0 0 1 1 0 1 1 0 1 0 1 … 2 0 1 2 2 2 1 0 1 2 1 2\n", " 2 2 1 0 0 1 1 2 1 1 2 0 1 2 0 2 0 2 2 2 0 0 0 2 1\n", " 0 1 2 1 0 0 0 1 0 0 2 0 2 1 1 0 1 0 1 0 0 0 1 0 1\n", " 1 2 1 1 0 0 1 1 1 1 2 1 1 1 0 2 1 2 2 2 0 1 1 1 2\n", " 2 1 0 2 0 2 1 0 1 0 2 1 1 1 0 2 2 1 1 1 0 0 2 1 2" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Q = M[:,selQTL]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "500x1950 Array{Int64,2}:\n", " 0 1 1 0 2 1 1 1 1 1 1 0 0 … 2 2 2 1 1 0 1 1 2 1 1 2\n", " 1 1 2 0 1 1 1 0 1 0 0 1 1 0 1 0 1 2 0 1 2 2 1 2 1\n", " 2 1 0 2 1 1 1 2 1 1 1 0 1 1 0 2 1 0 1 1 2 1 2 0 0\n", " 1 1 1 1 1 2 1 1 2 2 1 2 1 0 0 1 2 2 0 1 2 1 1 2 1\n", " 0 0 1 2 0 1 0 1 1 1 1 0 1 1 2 2 2 2 2 0 1 0 1 1 2\n", " 0 0 1 0 0 1 0 2 2 0 1 1 2 … 0 2 0 1 1 1 0 2 1 0 2 1\n", " 2 0 0 2 1 1 1 1 1 0 0 1 1 1 1 0 2 0 2 0 1 0 0 1 0\n", " 1 2 1 2 1 1 1 1 1 1 0 1 2 1 1 1 0 2 1 1 2 2 1 2 2\n", " 2 1 1 1 2 1 1 2 1 1 2 1 0 2 1 0 1 1 1 0 1 1 2 1 1\n", " 1 0 2 2 1 1 1 1 2 1 2 0 0 0 1 1 1 2 1 1 2 1 1 1 1\n", " 1 1 2 1 1 1 0 0 1 0 1 1 1 … 1 2 1 1 2 2 0 1 0 2 0 0\n", " 1 1 1 2 0 0 0 1 0 0 0 0 1 0 0 2 1 1 0 1 2 1 0 1 1\n", " 1 0 1 0 0 0 0 1 1 0 2 1 1 2 0 2 2 0 2 1 2 1 2 0 1\n", " ⋮ ⋮ ⋮ ⋱ ⋮ ⋮ \n", " 2 1 1 2 1 1 2 2 2 2 1 0 0 1 0 2 1 1 1 1 1 0 1 0 2\n", " 1 2 1 0 2 1 1 1 1 0 1 2 2 1 0 1 1 1 1 1 2 1 1 1 1\n", " 0 0 1 1 2 1 0 2 0 0 1 0 0 … 2 2 1 0 1 0 2 1 1 1 2 0\n", " 0 2 2 0 2 0 2 2 2 0 2 2 2 0 1 2 1 0 1 1 2 1 1 0 1\n", " 1 2 1 2 1 2 1 0 2 1 1 2 1 1 1 1 2 1 1 1 0 0 1 0 2\n", " 0 0 2 1 0 0 1 1 1 0 2 0 0 2 0 0 2 2 2 0 0 0 2 0 2\n", " 1 0 1 0 1 0 0 0 0 0 2 1 1 2 0 1 1 0 2 2 1 0 2 0 2\n", " 1 0 1 1 1 2 1 1 1 1 0 1 1 … 2 1 2 1 1 1 1 2 1 0 1 1\n", " 0 1 0 0 2 1 2 0 1 0 0 1 2 0 2 1 1 0 1 1 1 1 1 1 1\n", " 0 0 1 0 1 0 2 1 2 2 0 0 1 0 0 1 1 2 0 1 1 2 2 2 2\n", " 1 1 2 2 1 2 0 0 2 1 0 1 2 0 1 1 0 0 0 1 1 0 1 0 0\n", " 1 0 2 1 0 2 1 2 1 0 0 0 1 1 1 2 2 2 0 1 2 1 0 2 1" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = M[:,selMrk]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Simulation of breeding values and phenotypic values" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "genetic variance = 25.00 " ] } ], "source": [ "nQTL = size(Q,2)\n", "nObs = size(Q,1)\n", "α = rand(Normal(0,1),nQTL)\n", "a = Q*α\n", "# scaling breeding values to have variance 25.0\n", "v = var(a)\n", "genVar = 25.0\n", "a *= sqrt(genVar/v)\n", "va = var(a)\n", "# formatted printing\n", "@printf \"genetic variance = %8.2f \" va" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "phenotypic mean = 108.39 \n", "phenotypic variance = 91.74 \n" ] } ], "source": [ "resVar = 75.0\n", "resStd = sqrt(resVar)\n", "e = rand(Normal(0,resStd),nObs)\n", "y = 100 + a + e\n", "@printf \"phenotypic mean = %8.2f \\n\" Base.mean(y)\n", "@printf \"phenotypic variance = %8.2f \\n\" var(y)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Simulate Crossbreeding using XSim" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "scrolled": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generation 2: sampling 250 males and 250 females\n", "Generation 3: sampling 250 males and 250 females\n", "Generation 4: sampling 250 males and 250 females\n", "Generation 5: sampling 250 males and 250 females\n", "Generation 6: sampling 250 males and 250 females\n", "Generation 7: sampling 250 males and 250 females\n", "Generation 8: sampling 250 males and 250 females\n", "Generation 9: sampling 250 males and 250 females\n", "Generation 10: sampling 250 males and 250 females\n", "Generation 11: sampling 250 males and 250 females\n", "Generation 12: sampling 250 males and 250 females\n", "Generation 13: sampling 250 males and 250 females\n", "Generation 14: sampling 250 males and 250 females\n", "Generation 15: sampling 250 males and 250 females\n", "Generation 16: sampling 250 males and 250 females\n", "Generation 17: sampling 250 males and 250 females\n", "Generation 18: sampling 250 males and 250 females\n", "Generation 19: sampling 250 males and 250 females\n", "Generation 20: sampling 250 males and 250 females\n", "Generation 21: sampling 250 males and 250 females\n", "Generation 2: sampling 250 males and 250 females\n", "Generation 3: sampling 250 males and 250 females\n", "Generation 4: sampling 250 males and 250 females\n", "Generation 5: sampling 250 males and 250 females\n", "Generation 6: sampling 250 males and 250 females\n", "Generation 7: sampling 250 males and 250 females\n", "Generation 8: sampling 250 males and 250 females\n", "Generation 9: sampling 250 males and 250 females\n", "Generation 10: sampling 250 males and 250 females\n", "Generation 11: sampling 250 males and 250 females\n", "Generation 12: sampling 250 males and 250 females\n", "Generation 13: sampling 250 males and 250 females\n", "Generation 14: sampling 250 males and 250 females\n", "Generation 15: sampling 250 males and 250 females\n", "Generation 16: sampling 250 males and 250 females\n", "Generation 17: sampling 250 males and 250 females\n", "Generation 18: sampling 250 males and 250 females\n", "Generation 19: sampling 250 males and 250 females\n", "Generation 20: sampling 250 males and 250 females\n", "Generation 21: sampling 250 males and 250 females\n", "Generation 22: sampling 250 males and 250 females\n", "Generation 23: sampling 250 males and 250 females\n", "Generation 24: sampling 250 males and 250 females\n", "Generation 25: sampling 250 males and 250 females\n", "Generation 26: sampling 250 males and 250 females\n", "Generation 27: sampling 250 males and 250 females\n", "Generation 28: sampling 250 males and 250 females\n", "Generation 29: sampling 250 males and 250 females\n", "Generation 30: sampling 250 males and 250 females\n", "Generation 31: sampling 250 males and 250 females\n", "Generation 32: sampling 250 males and 250 females\n", "Generation 33: sampling 250 males and 250 females\n", "Generation 34: sampling 250 males and 250 females\n", "Generation 35: sampling 250 males and 250 females\n", "Generation 36: sampling 250 males and 250 females\n", "Generation 37: sampling 250 males and 250 females\n", "Generation 38: sampling 250 males and 250 females\n", "Generation 39: sampling 250 males and 250 females\n", "Generation 40: sampling 250 males and 250 females\n", "Generation 41: sampling 250 males and 250 females\n" ] } ], "source": [ "ngen,popSize = 20,500\n", "siresA,damsA,gen1 = sampleRan(popSize, ngen, sires, dams)\n", "siresB,damsB,gen1 = sampleRan(popSize, ngen, sires, dams)\n", "siresAB,damsAB,gen1 = sampleRan(popSize, ngen, siresA, damsB,gen=gen1);" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Calculate and plot gene frequencies " ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "freq=Base.mean(M,1)/2;" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " Gene Frequency\n", " \n", " \n", " 0.0\n", " 0.2\n", " 0.4\n", " 0.6\n", " 0.8\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " 0\n", " 10\n", " 20\n", " 30\n", " 40\n", " 50\n", " \n", " \n", " Frequency\n", " \n", " \n", " Distribution of Gene Frequency\n", " \n", "\n", "\n", "\n", " \n", "\n", "\n" ], "text/html": [ "\n", "\n", "\n", " \n", " Gene Frequency\n", " \n", " \n", " -1.0\n", " -0.8\n", " -0.6\n", " -0.4\n", " -0.2\n", " 0.0\n", " 0.2\n", " 0.4\n", " 0.6\n", " 0.8\n", " 1.0\n", " 1.2\n", " 1.4\n", " 1.6\n", " 1.8\n", " -0.85\n", " -0.80\n", " -0.75\n", " -0.70\n", " -0.65\n", " -0.60\n", " -0.55\n", " -0.50\n", " -0.45\n", " -0.40\n", " -0.35\n", " -0.30\n", " -0.25\n", " -0.20\n", " -0.15\n", " -0.10\n", " -0.05\n", " 0.00\n", " 0.05\n", " 0.10\n", " 0.15\n", " 0.20\n", " 0.25\n", " 0.30\n", " 0.35\n", " 0.40\n", " 0.45\n", " 0.50\n", " 0.55\n", " 0.60\n", " 0.65\n", " 0.70\n", " 0.75\n", " 0.80\n", " 0.85\n", " 0.90\n", " 0.95\n", " 1.00\n", " 1.05\n", " 1.10\n", " 1.15\n", " 1.20\n", " 1.25\n", " 1.30\n", " 1.35\n", " 1.40\n", " 1.45\n", " 1.50\n", " 1.55\n", " 1.60\n", " -1\n", " 0\n", " 1\n", " 2\n", " -0.85\n", " -0.80\n", " -0.75\n", " -0.70\n", " -0.65\n", " -0.60\n", " -0.55\n", " -0.50\n", " -0.45\n", " -0.40\n", " -0.35\n", " -0.30\n", " -0.25\n", " -0.20\n", " -0.15\n", " -0.10\n", " -0.05\n", " 0.00\n", " 0.05\n", " 0.10\n", " 0.15\n", " 0.20\n", " 0.25\n", " 0.30\n", " 0.35\n", " 0.40\n", " 0.45\n", " 0.50\n", " 0.55\n", " 0.60\n", " 0.65\n", " 0.70\n", " 0.75\n", " 0.80\n", " 0.85\n", " 0.90\n", " 0.95\n", " 1.00\n", " 1.05\n", " 1.10\n", " 1.15\n", " 1.20\n", " 1.25\n", " 1.30\n", " 1.35\n", " 1.40\n", " 1.45\n", " 1.50\n", " 1.55\n", " 1.60\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " -60\n", " -50\n", " -40\n", " -30\n", " -20\n", " -10\n", " 0\n", " 10\n", " 20\n", " 30\n", " 40\n", " 50\n", " 60\n", " 70\n", " 80\n", " 90\n", " 100\n", " 110\n", " -50\n", " -48\n", " -46\n", " -44\n", " -42\n", " -40\n", " -38\n", " -36\n", " -34\n", " -32\n", " -30\n", " -28\n", " -26\n", " -24\n", " -22\n", " -20\n", " -18\n", " -16\n", " -14\n", " -12\n", " -10\n", " -8\n", " -6\n", " -4\n", " -2\n", " 0\n", " 2\n", " 4\n", " 6\n", " 8\n", " 10\n", " 12\n", " 14\n", " 16\n", " 18\n", " 20\n", " 22\n", " 24\n", " 26\n", " 28\n", " 30\n", " 32\n", " 34\n", " 36\n", " 38\n", " 40\n", " 42\n", " 44\n", " 46\n", " 48\n", " 50\n", " 52\n", " 54\n", " 56\n", " 58\n", " 60\n", " 62\n", " 64\n", " 66\n", " 68\n", " 70\n", " 72\n", " 74\n", " 76\n", " 78\n", " 80\n", " 82\n", " 84\n", " 86\n", " 88\n", " 90\n", " 92\n", " 94\n", " 96\n", " 98\n", " 100\n", " -50\n", " 0\n", " 50\n", " 100\n", " -50\n", " -45\n", " -40\n", " -35\n", " -30\n", " -25\n", " -20\n", " -15\n", " -10\n", " -5\n", " 0\n", " 5\n", " 10\n", " 15\n", " 20\n", " 25\n", " 30\n", " 35\n", " 40\n", " 45\n", " 50\n", " 55\n", " 60\n", " 65\n", " 70\n", " 75\n", " 80\n", " 85\n", " 90\n", " 95\n", " 100\n", " \n", " \n", " Frequency\n", " \n", " \n", " Distribution of Gene Frequency\n", " \n", "\n", "\n", "\n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "Plot(...)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using Gadfly\n", "plot(x=freq,Geom.histogram,\n", "Guide.title(\"Distribution of Gene Frequency\"),\n", "Guide.ylabel(\"Frequency\"),\n", "Guide.xlabel(\"Gene Frequency\"))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "scrolled": true, "slideshow": { "slide_type": "skip" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[NbConvertApp] Converting notebook dataSimulation.ipynb to slides\n", "[NbConvertApp] Writing 411959 bytes to dataSimulation.slides.html\n" ] } ], "source": [ ";ipython nbconvert --to slides dataSimulation.ipynb" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Julia 0.4.0", "language": "julia", "name": "julia-0.4" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "0.4.0" } }, "nbformat": 4, "nbformat_minor": 0 }