(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.weblas = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o=0}function fromArray(e,t,l){var a,r,c=[];l?(c[1]=e.length,c[0]=e[0].length):(c[0]=e.length,c[1]=e[0].length),r=c[1],t=t||Float32Array,a=new t(c[0]*c[1]);for(var u=0;ur;r++)for(var c=0;t>c;c++)a[c*e+r]=l[r*t+c];return a}var globals=require("./lib/globals"),pipeline=require("./lib/pipeline"),SGEMMCalculator=require("./lib/sgemmcalculator"),SAXPYCalculator=require("./lib/saxpycalculator"),SSCALCalculator=require("./lib/sscalcalculator"),SDWNSCalculator=require("./lib/sdwnscalculator"),SCLMPCalculator=require("./lib/sclmpcalculator"),test=require("./lib/test"),gl=globals.gl,sgemmcalculator=new SGEMMCalculator(gl),saxpycalculator=new SAXPYCalculator(gl),sscalcalculator=new SSCALCalculator(gl),sdwnscalculator=new SDWNSCalculator(gl),sclmpcalculator=new SCLMPCalculator(gl);module.exports={saxpy:saxpy,sscal:sscal,sgemm:sgemm,sstd:sstd,sdwns:sdwns,sclmp:sclmp,pipeline:pipeline,gpu:{gl:gl,sgemm:pipeline.sgemmcalculator.calculate.bind(pipeline.sgemmcalculator),sscal:pipeline.sscalcalculator.calculate.bind(pipeline.sscalcalculator),sclmp:pipeline.sclmpcalculator.calculate.bind(pipeline.sclmpcalculator),sdwns:pipeline.sdwnscalculator.calculate.bind(pipeline.sdwnscalculator),encode:gl.encode.bind(gl)},util:{fromArray:fromArray,transpose:transpose},test:test},String.prototype.format||(String.prototype.format=function(){var e=arguments;return this.replace(/{(\d+)}/g,function(t,l){return"undefined"!=typeof e[l]?e[l]:t})}); },{"./lib/globals":2,"./lib/pipeline":3,"./lib/saxpycalculator":4,"./lib/sclmpcalculator":5,"./lib/sdwnscalculator":6,"./lib/sgemmcalculator":7,"./lib/sscalcalculator":8,"./lib/test":10}],2:[function(require,module,exports){ var WebGL=require("./webgl"),gl=new WebGL;module.exports={gl:gl}; },{"./webgl":11}],3:[function(require,module,exports){ function sscal(l,a,e){var r=e.shape[0],s=e.shape[1],c=new Tensor([r,s],null);return sscalcalculator.calculate(r,s,l,a,e.texture,c.texture),c}function sgemm(l,a,e,r,s){if(e.shape[1]!==a.shape[1])throw new Error("Second dimension must be of same size for input Tensors (second Tensor is transposed).");var c,t=a.shape[0],u=e.shape[0],o=a.shape[1];c=s?s.texture:null;var n=new Tensor([t,u],null);return sgemmcalculator.calculate(t,u,o,l,a.texture,e.texture,r,c,n.texture),n}function sdwns(l,a,e,r){if(r.shape[1]%l!==0)throw new Error("Second dimension of tensor must be a multiple of channels");var s=r.shape[0],c=r.shape[1]/l,t=Math.floor((s-a)/e)+1,u=Math.floor((c-a)/e)+1,o=new Tensor([t,u*l],null);return sdwnscalculator.calculate(s,c,l,a,e,r.texture,o.texture),o}function sclmp(l,a,e){l=null!=l?l:Number.MIN_VALUE,a=null!=a?a:Number.MAX_VALUE;var r=e.shape[0],s=e.shape[1],c=new Tensor([r,s],null);return sclmpcalculator.calculate(r,s,l,a,e.texture,c.texture),c}var globals=require("./globals"),SGEMMCalculator=require("./sgemmcalculator"),SAXPYCalculator=require("./saxpycalculator"),SSCALCalculator=require("./sscalcalculator"),SDWNSCalculator=require("./sdwnscalculator"),SCLMPCalculator=require("./sclmpcalculator"),Tensor=require("./tensor"),gl=globals.gl,sgemmcalculator=new SGEMMCalculator(gl,!1),saxpycalculator=new SAXPYCalculator(gl,!1),sscalcalculator=new SSCALCalculator(gl,!1),sdwnscalculator=new SDWNSCalculator(gl,!1),sclmpcalculator=new SCLMPCalculator(gl,!1);module.exports={Tensor:Tensor,sscal:sscal,sgemm:sgemm,sdwns:sdwns,sclmp:sclmp,sgemmcalculator:sgemmcalculator,saxpycalculator:saxpycalculator,sscalcalculator:sscalcalculator,sdwnscalculator:sdwnscalculator,sclmpcalculator:sclmpcalculator}; },{"./globals":2,"./saxpycalculator":4,"./sclmpcalculator":5,"./sdwnscalculator":6,"./sgemmcalculator":7,"./sscalcalculator":8,"./tensor":9}],4:[function(require,module,exports){ function SAXPYCalculator(t,n){this.webgl=t,this.standalone=n||!0;var e="#define GLSLIFY 1\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform sampler2D Y; // texture with data from padded transpose of B\nuniform int N;\nuniform float a; // coefficient to multiplication\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1540259130(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row = outTex.y;\n float col = outTex.x;\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col, row));\n vec4 y = texture2D( Y, vec2(col, row));\n vec4 sum_v = (a * x) + y;\n int channel = int(mod(col * float(N), 4.0 ));\n float sum = select_index_1604150559(sum_v, channel);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1540259130(sum);\n}\n";this.standalone?this.program=this.webgl.createProgram(e):this.program=this.webgl.createProgram(p)}var WebGL=require("./webgl");module.exports=SAXPYCalculator,SAXPYCalculator.TEXTURE_UNIFORM_NAME_0="X",SAXPYCalculator.TEXTURE_UNIFORM_NAME_1="Y",SAXPYCalculator.LENGTH_UNIFORM_NAME="N",SAXPYCalculator.COEFFICIENT_UNIFORM_NAME="a",SAXPYCalculator.prototype.calculate=function(t,n,e,o,a){var l=this.webgl.context;this.webgl.selectProgram(this.program),this.bindInputTexture(e,l.TEXTURE0,SAXPYCalculator.TEXTURE_UNIFORM_NAME_0),this.bindInputTexture(o,l.TEXTURE1,SAXPYCalculator.TEXTURE_UNIFORM_NAME_1);var i=this.webgl.getPad(t);this.bindUniforms(t+i,n),this.webgl.bindOutputTexture(1,t+i,a),l.drawElements(l.TRIANGLES,6,l.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(l.TEXTURE0),this.webgl.unbindInputTexture(l.TEXTURE1)},SAXPYCalculator.prototype.bindInputTexture=function(t,n,e){var o=this.webgl.context,a=this.program;o.activeTexture(n),o.bindTexture(o.TEXTURE_2D,t);var l=o.getUniformLocation(a,e);o.uniform1i(l,n-o.TEXTURE0)},SAXPYCalculator.prototype.bindUniforms=function(t,n){var e=this.webgl.context,o=e.getUniformLocation(this.program,SAXPYCalculator.LENGTH_UNIFORM_NAME),a=e.getUniformLocation(this.program,SAXPYCalculator.COEFFICIENT_UNIFORM_NAME);e.uniform1i(o,t),e.uniform1f(a,n)}; },{"./webgl":11}],5:[function(require,module,exports){ function SCLMPCalculator(n,t){this.webgl=n,this.standalone=null!=t?t:!0;var e="#define GLSLIFY 1\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float a; // lower bound\nuniform float b; // upper bound\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1540259130(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row = outTex.y;\n float col = outTex.x;\n\n // return 0.0 if in padded region of output texture\n if(col * float(N + pad) > float(N) ) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col, row));\n vec4 val = clamp(x, a, b);\n\n // select and output channel (standalone version only)\n int channel = int(mod(col * float(N + pad), 4.0));\n float sum = select_index_1604150559(val, channel);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1540259130(sum);\n}\n",o="#define GLSLIFY 1\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float a; // lower bound\nuniform float b; // upper bound\n\n// set pad values to 0.0, if in padded region of output texture\nvoid fix_pad_1540259130(inout vec4 v, int pad){\n v.a = 0.0;\n if(pad == 2){\n v.b = 0.0;\n } else if(pad == 3){\n v.b = 0.0;\n v.g = 0.0;\n }\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col_t, row_t));\n vec4 val_v = clamp(x, a, b);\n\n // is last element in pixel past row length?\n if(pad > 0 && (col + 4.0) > float(N) ) {\n // fix elements in padded region\n fix_pad_1540259130(val_v, pad);\n }\n\n gl_FragColor = val_v;\n}\n";this.standalone?this.program=this.webgl.createProgram(e):this.program=this.webgl.createProgram(o)}var WebGL=require("./webgl");module.exports=SCLMPCalculator,SCLMPCalculator.TEXTURE_UNIFORM_NAME_0="X",SCLMPCalculator.LENGTH_UNIFORM_NAME="N",SCLMPCalculator.LOWER_UNIFORM_NAME="a",SCLMPCalculator.UPPER_UNIFORM_NAME="b",SCLMPCalculator.prototype.calculate=function(n,t,e,o,a,l){e=null!=e?e:Number.MIN_VALUE,o=null!=o?o:Number.MAX_VALUE;var i=this.webgl.context;this.webgl.selectProgram(this.program),this.bindInputTexture(a,i.TEXTURE0,SCLMPCalculator.TEXTURE_UNIFORM_NAME_0);var r=this.webgl.getPad(t);this.bindUniforms(t,r,e,o),this.standalone?this.webgl.bindOutputTexture(n,t+r,l):this.webgl.bindOutputTexture(n,(t+r)/4,l),i.drawElements(i.TRIANGLES,6,i.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(i.TEXTURE0)},SCLMPCalculator.prototype.bindInputTexture=function(n,t,e){var o=this.webgl.context,a=this.program;o.activeTexture(t),o.bindTexture(o.TEXTURE_2D,n);var l=o.getUniformLocation(a,e);o.uniform1i(l,t-o.TEXTURE0)},SCLMPCalculator.prototype.bindUniforms=function(n,t,e,o){var a=this.webgl.context,l=a.getUniformLocation(this.program,SCLMPCalculator.LENGTH_UNIFORM_NAME),i=a.getUniformLocation(this.program,SCLMPCalculator.UPPER_UNIFORM_NAME),r=a.getUniformLocation(this.program,SCLMPCalculator.LOWER_UNIFORM_NAME),u=a.getUniformLocation(this.program,"pad");a.uniform1i(l,n),a.uniform1i(u,t),a.uniform1f(r,e),a.uniform1f(i,o)}; },{"./webgl":11}],6:[function(require,module,exports){ function DownsampleCalculator(n,o){this.webgl=n,this.standalone=null!=o?o:!0;var t="#define GLSLIFY 1\n// TODO: unroll loop for stride == factor and small values (2, 3)\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int factor; // width of image patch\nuniform float stride; // width between image patches\nuniform float C; // number of channels\nuniform float M;\nuniform float N;\nuniform float N_out;\nuniform float M_out;\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1540259130(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate and translate to output pixel space.\n float row = floor(outTex.y * M_out); // row on output texture (matrix space)\n float col = floor(outTex.x * N_out); // column on output texture (matrix space)\n float vcol = floor(col / C); // virtual column on output texture (matrix space)\n float vchannel = floor(mod(col, C)); // virtual channel on output texture\n\n const float min = -1.0e+08;\n vec4 currentMax = vec4(min, min, min, min);\n\n float deltaY = 1.0/M;\n float deltaX = 1.0/N;\n float y = ((row * stride) + 0.5)*deltaY; // texture position of input row\n float x;\n float z = vchannel * deltaX;\n for (int i = 0; i < 100; i += 1) {\n if (i >= factor) {\n break;\n }\n x = ((vcol * stride * C) + 0.5) * deltaX; // texture position of input column\n\n for (int j = 0; j < 100; j += 1) {\n if (j >= factor) {\n break;\n }\n\n vec2 coords = vec2(x + z, y);\n vec4 x_v = texture2D(X, coords);\n currentMax = max(currentMax, x_v);\n\n x += (deltaX * C);\n }\n y += deltaY;\n }\n int chan = int(mod(outTex.x * N_out, 4.0 ));\n float val = select_index_1604150559(currentMax, int(chan));\n if (val == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n gl_FragColor = encode_float_1540259130(val);\n}\n";p="#define GLSLIFY 1\n// TODO: unroll loop for stride == factor and small values (2, 3)\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded A\nuniform int factor; // width of image patch\nuniform float stride; // width between image patches\nuniform float C; // number of channels\nuniform float M;\nuniform float N;\nuniform float N_out;\nuniform float M_out;\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate and translate to output pixel space.\n float row = floor(outTex.y * M_out); // row on output texture (pixel space)\n float col = floor(outTex.x * N_out); // column on output texture (matrix space)\n float vcol = floor(col / C); // virtual column on output texture (matrix space)\n float vchannel = floor(mod(col, C)); // virtual channel on output texture\n\n const float min = -1.0e+08;\n vec4 currentMax = vec4(min, min, min, min);\n\n float deltaY = 1.0/M;\n float deltaX = 1.0/N;\n float y = ((row * stride) + 0.5)*deltaY; // texture position of input row\n float x;\n float z = vchannel * deltaX;\n for (int i = 0; i < 100; i += 1) {\n if (i >= factor) {\n break;\n }\n x = ((vcol * stride * C) + 0.5) * deltaX; // texture position of input column\n\n for (int j = 0; j < 100; j += 1) {\n if (j >= factor) {\n break;\n }\n\n vec2 coords = vec2(x + z, y);\n vec4 x_v = texture2D(X, coords);\n currentMax = max(currentMax, x_v);\n\n x += (deltaX * C);\n }\n y += deltaY;\n }\n\n gl_FragColor = currentMax;\n}\n",this.standalone?this.program=this.webgl.createProgram(t):this.program=this.webgl.createProgram(p)}var WebGL=require("./webgl");module.exports=DownsampleCalculator,DownsampleCalculator.TEXTURE_UNIFORM_NAME_0="X",DownsampleCalculator.INPUT_ROW_COUNT_UNIFORM_NAME="M",DownsampleCalculator.INPUT_COLUMN_COUNT_UNIFORM_NAME="N",DownsampleCalculator.OUTPUT_ROW_COUNT_UNIFORM_NAME="M_out",DownsampleCalculator.OUTPUT_COLUMN_COUNT_UNIFORM_NAME="N_out",DownsampleCalculator.FACTOR_UNIFORM_NAME="factor",DownsampleCalculator.STRIDE_UNIFORM_NAME="stride",DownsampleCalculator.CHANNEL_COUNT_UNIFORM_NAME="C",DownsampleCalculator.prototype.calculate=function(n,o,t,a,e,l,r){if(t%WebGL.COMPONENTS_PER_TEXEL!=0)throw new Error("Channel count must be a multiple of "+WebGL.COMPONENTS_PER_TEXEL);var i=this.webgl.context,u=(Math.floor((o-a)/e)+1)*t,f=Math.floor((n-a)/e)+1;this.webgl.selectProgram(this.program),this.bindInputTexture(l,i.TEXTURE0,DownsampleCalculator.TEXTURE_UNIFORM_NAME_0),this.bindUniforms(n,o*t,f,u,a,e,t),this.standalone?this.webgl.bindOutputTexture(f,u,r):this.webgl.bindOutputTexture(f,u/WebGL.COMPONENTS_PER_TEXEL,r),i.drawElements(i.TRIANGLES,6,i.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(i.TEXTURE0)},DownsampleCalculator.prototype.bindInputTexture=function(n,o,t){var a=this.webgl.context,e=this.program;a.activeTexture(o),a.bindTexture(a.TEXTURE_2D,n);var l=a.getUniformLocation(e,t);a.uniform1i(l,o-a.TEXTURE0)},DownsampleCalculator.prototype.bindUniforms=function(n,o,t,a,e,l,r){var i=this.webgl.context,u=i.getUniformLocation(this.program,DownsampleCalculator.INPUT_ROW_COUNT_UNIFORM_NAME),f=i.getUniformLocation(this.program,DownsampleCalculator.INPUT_COLUMN_COUNT_UNIFORM_NAME),m=i.getUniformLocation(this.program,DownsampleCalculator.OUTPUT_ROW_COUNT_UNIFORM_NAME),c=i.getUniformLocation(this.program,DownsampleCalculator.OUTPUT_COLUMN_COUNT_UNIFORM_NAME),s=i.getUniformLocation(this.program,DownsampleCalculator.FACTOR_UNIFORM_NAME),p=i.getUniformLocation(this.program,DownsampleCalculator.STRIDE_UNIFORM_NAME),d=i.getUniformLocation(this.program,DownsampleCalculator.CHANNEL_COUNT_UNIFORM_NAME);i.uniform1f(u,n),i.uniform1f(f,o),i.uniform1f(m,t),i.uniform1f(c,a),i.uniform1i(s,e),i.uniform1f(p,l),i.uniform1f(d,r)}; },{"./webgl":11}],7:[function(require,module,exports){ function SGEMMCalculator(t,e){this.webgl=t,this.standalone=null!=e?e:!0;var n="#define GLSLIFY 1\n// fragment shader that calculates the matrix product and renders each\n// element to the bytes representing a 32-bit IEEE754 floating point in\n// the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1604150559(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n\n // sum row x col for the passed pixel\n float sum = alpha * dot_rowcol_1540259130(row_t, col_t * float(N + pad)/float(N), A, B_t, K);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1604150559(sum);\n}\n",o="#define GLSLIFY 1\n// fragment shader that calculates the matrix product (with additive 'C' term)\n// and renders each element to the bytes representing a 32-bit IEEE754 floating\n// point in the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform sampler2D C; // texture with data from C\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\nuniform float beta; // coefficient to additive term\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1604150559(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1117569599(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n vec4 c_vec = texture2D(C, vec2(col_t, 0.5));\n\n // should be -0.5, but that subtly breaks at zero\n float col = col_t * float(N + pad); // index of first element in pixel (matrix space)\n int channel = int(mod(col, 4.0 ));\n float c = select_index_1117569599(c_vec, channel);\n\n // sum row x col for the passed pixel\n float sum = alpha * dot_rowcol_1540259130(row_t, col_t * float(N + pad)/float(N), A, B_t, K);\n sum += beta * c;\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1604150559(sum);\n}\n",a="#define GLSLIFY 1\n// fragment shader that calculates the matrix product and writes each\n// element to a pixel component in a floating point texture.\n// the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n\n vec4 sum_v = vec4(0.0, 0.0, 0.0, 0.0);\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n sum_v.r = alpha * dot_rowcol_1540259130(row_t, (col + 0.5)/float(N), A, B_t, K);\n // is last element in pixel past row length?\n if(pad > 0 && (col + 4.0) > float(N) ) {\n // compute elements in padded region\n if(pad < 3){\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n }\n if(pad < 2){\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n }\n } else {\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n sum_v.a = alpha * dot_rowcol_1540259130(row_t, (col + 3.5)/float(N), A, B_t, K);\n }\n\n gl_FragColor = sum_v;\n}\n",r="#define GLSLIFY 1\n// fragment shader that calculates the matrix product and writes each\n// element to a pixel component in a floating point texture.\n// the output RGBA canvas.\n// readPixel is used to read the bytes.\n\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D A; // texture with data from padded A\nuniform sampler2D B_t; // texture with data from padded transpose of B\nuniform sampler2D C; // texture with data from C\nuniform int K; // number of elements in shared dimension\nuniform int N; // number of columns in output\nuniform int pad; //\nuniform float alpha; // coefficient to multiplication\nuniform float beta; // coefficient to addition\n\n// sum of products between elements in row i (from A) x col j (from B)\n\n// Calculate the dot product between the row (from A) and column (from B)\n// identified by the passed indeces (output texture coordinate space).\n// We loop over elements in the row and column and sum the product\n// using the glsl `dot` function to process four elements at a time.\n// This four element optimization requires that the matrix B be\n// transposed before texel packing and that both matrices be padded\n// (with zeros) to a multiple of four (4) in their shared dimension.\nfloat dot_rowcol_1540259130(float y, float x, sampler2D A, sampler2D B_t, int K) {\n float delta_t = 1./float(K);// space (on texture) between elements\n float sum = 0.; // sum for this row/column pair\n float z = 0.5 * (4.0 * delta_t);// position for shared dimension on source textures\n\n for (int l=0 ; l<4096 ; ++l) {\n if(l >= K / 4) break; // stop when we finish the row/column\n // l is in pixel space, so we divide by four\n\n // retrieve next four elements from each texture\n vec4 a_ik = texture2D( A, vec2(z, y));\n vec4 b_kj = texture2D(B_t, vec2(z, x));\n\n // use `dot` to process four elements at a time\n sum += dot(a_ik, b_kj);\n z += (4.0 * delta_t); // (z + 0.5)*delta\n }\n return sum;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n vec4 c_v = texture2D(C, vec2(col_t, 0.5));\n\n vec4 sum_v = vec4(0.0, 0.0, 0.0, 0.0);\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n sum_v.r = alpha * dot_rowcol_1540259130(row_t, (col + 0.5)/float(N), A, B_t, K);\n // in the padding region?\n if(pad > 0 && (col + 4.0) > float(N) ) {\n // pad\n if(pad < 3){\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n }\n if(pad < 2){\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n }\n } else {\n sum_v.g = alpha * dot_rowcol_1540259130(row_t, (col + 1.5)/float(N), A, B_t, K);\n sum_v.b = alpha * dot_rowcol_1540259130(row_t, (col + 2.5)/float(N), A, B_t, K);\n sum_v.a = alpha * dot_rowcol_1540259130(row_t, (col + 3.5)/float(N), A, B_t, K);\n }\n\n gl_FragColor = sum_v + beta*c_v;\n}\n";this.standalone?(this.program_=this.webgl.createProgram(n),this.program_c=this.webgl.createProgram(o)):(this.program_=this.webgl.createProgram(a),this.program_c=this.webgl.createProgram(r))}var WebGL=require("./webgl");module.exports=SGEMMCalculator,SGEMMCalculator.TEXTURE_UNIFORM_NAME_0="A",SGEMMCalculator.TEXTURE_UNIFORM_NAME_1="B_t",SGEMMCalculator.TEXTURE_UNIFORM_NAME_2="C",SGEMMCalculator.SHARED_LENGTH_UNIFORM_NAME="K",SGEMMCalculator.COLUMN_COUNT_UNIFORM_NAME="N",SGEMMCalculator.PAD_UNIFORM_NAME="pad",SGEMMCalculator.ALPHA_UNIFORM_NAME="alpha",SGEMMCalculator.BETA_UNIFORM_NAME="beta",SGEMMCalculator.prototype.calculate=function(t,e,n,o,a,r,i,l,s){var u=this.webgl.context;null!=l?this.program=this.program_c:(i=null,this.program=this.program_),this.webgl.selectProgram(this.program),this.bindInputTexture(a,u.TEXTURE0,SGEMMCalculator.TEXTURE_UNIFORM_NAME_0),this.bindInputTexture(r,u.TEXTURE1,SGEMMCalculator.TEXTURE_UNIFORM_NAME_1),null!=l&&this.bindInputTexture(l,u.TEXTURE2,SGEMMCalculator.TEXTURE_UNIFORM_NAME_2);var m=this.webgl.getPad(n),d=this.webgl.getPad(e);this.bindUniforms(e,n+m,d,o,i),this.standalone?this.webgl.bindOutputTexture(t,e+d,s):this.webgl.bindOutputTexture(t,(e+d)/4,s),u.drawElements(u.TRIANGLES,6,u.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(u.TEXTURE0),this.webgl.unbindInputTexture(u.TEXTURE1),this.webgl.unbindInputTexture(u.TEXTURE2)},SGEMMCalculator.prototype.bindInputTexture=function(t,e,n){var o=this.webgl.context,a=this.program;o.activeTexture(e),o.bindTexture(o.TEXTURE_2D,t);var r=o.getUniformLocation(a,n);o.uniform1i(r,e-o.TEXTURE0)},SGEMMCalculator.prototype.bindUniforms=function(t,e,n,o,a){var r=this.webgl.context,i=r.getUniformLocation(this.program,SGEMMCalculator.SHARED_LENGTH_UNIFORM_NAME),l=r.getUniformLocation(this.program,SGEMMCalculator.ALPHA_UNIFORM_NAME),s=r.getUniformLocation(this.program,SGEMMCalculator.BETA_UNIFORM_NAME),u=r.getUniformLocation(this.program,SGEMMCalculator.COLUMN_COUNT_UNIFORM_NAME),m=m=r.getUniformLocation(this.program,SGEMMCalculator.PAD_UNIFORM_NAME);r.uniform1f(s,a),r.uniform1i(u,t),r.uniform1i(m,n),r.uniform1i(i,e),r.uniform1f(l,o)}; },{"./webgl":11}],8:[function(require,module,exports){ function SSCALCalculator(n,t){this.webgl=n,this.standalone=null!=t?t:!0;var e="#define GLSLIFY 1\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded X\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float b; // additive term\nuniform float a; // multiplicative term\n\n// Render float to bytes according to IEEE 754 Floating Point\nvec4 encode_float_1540259130(float val) {\n\n // TODO: correctly handle denormal numbers\n // http://www.2ality.com/2012/04/number-encoding.html\n float a = abs(val); // encode absolute value + sign\n float exp = floor(log2(a)); // number of powers of 2\n float mant = pow(2.,log2(a)-exp) * pow(2.,23.); // multiply to fill 24 bits (implied leading 1)\n float mant1 = floor(mant / 256. / 256.); // first 8 bits of mantissa\n float mant2 = mod(floor(mant / 256.),256.); // second 8 bits\n float mant3 = mod(mant,256.); // third 8 bits\n\n highp float sign = 128.-128.*(a/val); // sign bit is 256 or 0\n highp float e = (sign+exp+127.)/510.; // exponent and sign\n highp float m1 = (mant1-(128.*(1.-mod(exp+127.,2.))))/255.; // handle leading bit\n highp float m2 = (mant2)/255.; // middle part\n highp float m3 = (mant3+.5)/255.; // scale to 0 - 255\n\n return vec4(m3,m2,m1,e);\n}\n\n// select an element from a vector based on index\nfloat select_index_1604150559(vec4 v, int index){\n float val;\n if (index == 0) {\n val = v.r;\n } else if(index == 1) {\n val = v.g;\n } else if(index == 2) {\n val = v.b;\n } else if(index == 3){\n val = v.a;\n } else {\n // should never be here\n val = 0.0;\n }\n\n return val;\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row = outTex.y;\n float col = outTex.x;\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col, row));\n vec4 sum_v = (a * x) + b;\n int channel = int(mod(col * float(N + pad), 4.0 ));\n float sum = select_index_1604150559(sum_v, channel);\n\n if (sum == 0.) {\n gl_FragColor = vec4(0.,0.,0.,0.);\n return;\n }\n\n // output vec4 with bytes for an IEEE754 32-bit floating point number\n gl_FragColor = encode_float_1540259130(sum);\n}\n",o="#define GLSLIFY 1\nprecision highp float;\n\nvarying vec2 outTex; // texture coords of row/column to calculate\nuniform sampler2D X; // texture with data from padded X\nuniform int N; // number of columns\nuniform int pad; // additional columns to nearest multiple of four\nuniform float b; // additive term\nuniform float a; // multiplicative term\n\n// set pad values to 0.0, if in padded region of output texture\nvoid fix_pad_1540259130(inout vec4 v, int pad){\n v.a = 0.0;\n if(pad == 2){\n v.b = 0.0;\n } else if(pad == 3){\n v.b = 0.0;\n v.g = 0.0;\n }\n}\n\nvoid main(void) {\n\n // get the implied row and column from .y and .x of passed (output)\n // texture coordinate. These map directly to input texture space when\n // the relevant dimensions are the same.\n float row_t = outTex.y;\n float col_t = outTex.x;\n float col = (col_t * float(N + pad) - 2.0); // index of first element in pixel (matrix space)\n\n // direct usage of col requires output be padded exactly like input\n vec4 x = texture2D( X, vec2(col_t, row_t));\n vec4 sum_v = (a * x) + b;\n\n // fix padded region\n if(pad > 0 && col + 4.0 > float(N) ) {\n fix_pad_1540259130(sum_v, pad);\n }\n\n gl_FragColor = sum_v;\n}\n";this.standalone?this.program=this.webgl.createProgram(e):this.program=this.webgl.createProgram(o)}var WebGL=require("./webgl");module.exports=SSCALCalculator,SSCALCalculator.TEXTURE_UNIFORM_NAME_0="X",SSCALCalculator.LENGTH_UNIFORM_NAME="N",SSCALCalculator.ADD_UNIFORM_NAME="b",SSCALCalculator.MUL_UNIFORM_NAME="a",SSCALCalculator.prototype.calculate=function(n,t,e,o,a,i){var l=this.webgl.context,r=t%WebGL.COMPONENTS_PER_TEXEL,u=0==r?0:WebGL.COMPONENTS_PER_TEXEL-r;this.webgl.selectProgram(this.program),this.bindInputTexture(a,l.TEXTURE0,SSCALCalculator.TEXTURE_UNIFORM_NAME_0),this.bindUniforms(t,u,e,o),this.standalone?this.webgl.bindOutputTexture(n,t+u,i):this.webgl.bindOutputTexture(n,(t+u)/4,i),l.drawElements(l.TRIANGLES,6,l.UNSIGNED_SHORT,0),this.webgl.unbindInputTexture(l.TEXTURE0)},SSCALCalculator.prototype.bindInputTexture=function(n,t,e){var o=this.webgl.context,a=this.program;o.activeTexture(t),o.bindTexture(o.TEXTURE_2D,n);var i=o.getUniformLocation(a,e);o.uniform1i(i,t-o.TEXTURE0)},SSCALCalculator.prototype.bindUniforms=function(n,t,e,o){var a=this.webgl.context,i=a.getUniformLocation(this.program,SSCALCalculator.LENGTH_UNIFORM_NAME),l=a.getUniformLocation(this.program,SSCALCalculator.ADD_UNIFORM_NAME),r=a.getUniformLocation(this.program,SSCALCalculator.MUL_UNIFORM_NAME),u=a.getUniformLocation(this.program,"pad");a.uniform1i(i,n),a.uniform1i(u,t),a.uniform1f(r,e),a.uniform1f(l,o)}; },{"./webgl":11}],9:[function(require,module,exports){ function Tensor(e,t){if(2!=e.length)throw new Error("Only Tensor of order two (matrix) is supported right now.");var r=e[0],s=e[1];this.texture=gl.createDataTexture(r,s,t),this.shape=[r,s]}var globals=require("./globals"),gl=globals.gl;module.exports=Tensor,Tensor.prototype["delete"]=function(){gl.context.deleteTexture(this.texture),this.texture=null,this.shape=null},Tensor.prototype.transfer=function(e){var t,r,s=this.shape[0],o=this.shape[1];return t=gl.createOutputTexture(s,o),gl.encode(s,o,this.texture,t),r=new Float32Array(gl.readData(s,o)),gl.context.deleteTexture(t),e||this["delete"](),r},Tensor.prototype.reshape=function(e,t){var r=this.shape[0],s=this.shape[1],o=e[0],n=e[1],l=new Tensor(e,null);return gl.reshape(r,s,o,n,this.texture,l.texture),t||this["delete"](),l},Tensor.prototype.transpose=function(e){var t=this.shape[0],r=this.shape[1],s=new Tensor([r,t],null);return gl.transpose(t,r,this.texture,s.texture),e||this["delete"](),s}; },{"./globals":2}],10:[function(require,module,exports){ var async=require("async"),loader=require("floader");test={},test.allclose=function(e,t,r,a){if(r=r||1e-5,a=a||1e-8,e.length!=t.length)return console.log("lengths not equal: "+e.length+", "+t.length),{result:!1,index:null};for(var n,l=0;la;a++){for(var n=[],l=0;t>l;l++)n[l]=Math.random()/Math.sqrt(e);r.push(n)}return r},test.padData=function(e,t,r,a){for(var n=new Float32Array(e*(t+r)),l=0;e>l;l++)n.set(a.subarray(l*t,(l+1)*t),l*(t+r));return n},test.load=function(e,t,r){var a=t.map(function(t){return e+t});async.map(a,loader.load,function(e,t){if(e)return r(e);var a=t.map(JSON.parse);r(e,a)})},test.assert={},test.assert.allclose=function(e,t,r,a,n,l){var s=test.allclose(t,r,n,l),o="[",u="[";if(!s.result){s.index>1&&(o+="..., ",u+="..., "),s.index>0&&(o+=t[s.index-1]+", ",u+=r[s.index-1]+", "),o+="-->",u+="-->";for(var i=s.index;i_;_++)d=_*t,full_texel_row_end=d+f,s=new Float32Array(n.buffer,d*n.BYTES_PER_ELEMENT,f),f>0&&o.texSubImage2D(o.TEXTURE_2D,0,0,_,u,1,o.RGBA,o.FLOAT,s),c=new Float32Array(n.buffer,full_texel_row_end*n.BYTES_PER_ELEMENT,i),x.set(c),o.texSubImage2D(o.TEXTURE_2D,0,u,_,1,1,o.RGBA,o.FLOAT,x)}return o.texParameteri(o.TEXTURE_2D,o.TEXTURE_WRAP_S,o.CLAMP_TO_EDGE),o.texParameteri(o.TEXTURE_2D,o.TEXTURE_WRAP_T,o.CLAMP_TO_EDGE),o.texParameteri(o.TEXTURE_2D,o.TEXTURE_MAG_FILTER,o.NEAREST),o.texParameteri(o.TEXTURE_2D,o.TEXTURE_MIN_FILTER,o.NEAREST),o.bindTexture(o.TEXTURE_2D,null),l},WebGL.prototype.createOutputTexture=function(e,t){var n=this.context,o=this.getPad(t),r=n.createTexture();return n.bindTexture(n.TEXTURE_2D,r),n.texImage2D(n.TEXTURE_2D,0,n.RGBA,t+o,e,0,n.RGBA,n.UNSIGNED_BYTE,null),n.texParameteri(n.TEXTURE_2D,n.TEXTURE_WRAP_S,n.CLAMP_TO_EDGE),n.texParameteri(n.TEXTURE_2D,n.TEXTURE_WRAP_T,n.CLAMP_TO_EDGE),n.texParameteri(n.TEXTURE_2D,n.TEXTURE_MAG_FILTER,n.NEAREST),n.texParameteri(n.TEXTURE_2D,n.TEXTURE_MIN_FILTER,n.NEAREST),n.bindTexture(n.TEXTURE_2D,null),r},WebGL.prototype.bindOutputTexture=function(e,t,n){var o=this.context;if(this.canvas.height=e,this.canvas.width=t,o.viewport(0,0,t,e),this.framebuffer=this.framebuffer||o.createFramebuffer(),o.bindFramebuffer(o.FRAMEBUFFER,this.framebuffer),o.framebufferTexture2D(o.FRAMEBUFFER,o.COLOR_ATTACHMENT0,o.TEXTURE_2D,n,0),o.checkFramebufferStatus(o.FRAMEBUFFER)!=o.FRAMEBUFFER_COMPLETE)throw new Error("Bound framebuffer is not complete.");return this.framebuffer},WebGL.prototype.unbindInputTexture=function(e){var t=this.context;t.activeTexture(e),t.bindTexture(t.TEXTURE_2D,null)},WebGL.prototype.readData=function(e,t){var n=this.context;return rawbuffer=new ArrayBuffer(e*t*Float32Array.BYTES_PER_ELEMENT),prod=new Uint8Array(rawbuffer),n.readPixels(0,0,t,e,n.RGBA,n.UNSIGNED_BYTE,prod),rawbuffer},WebGL.prototype.getPad=function(e){var t=e%WebGL.COMPONENTS_PER_TEXEL,n=0==t?0:WebGL.COMPONENTS_PER_TEXEL-t;return n}; },{}],12:[function(require,module,exports){ (function (process,global){ !function(){function n(){}function t(n){return n}function e(n){return!!n}function r(n){return!n}function u(n){return function(){if(null===n)throw new Error("Callback was already called.");n.apply(this,arguments),n=null}}function i(n){return function(){null!==n&&(n.apply(this,arguments),n=null)}}function o(n){return M(n)||"number"==typeof n.length&&n.length>=0&&n.length%1===0}function c(n,t){for(var e=-1,r=n.length;++er?r:null}):(e=W(n),t=e.length,function(){return r++,t>r?e[r]:null})}function m(n,t){return t=null==t?n.length-1:+t,function(){for(var e=Math.max(arguments.length-t,0),r=Array(e),u=0;e>u;u++)r[u]=arguments[u+t];switch(t){case 0:return n.call(this,r);case 1:return n.call(this,arguments[0],r)}}}function y(n){return function(t,e,r){return n(t,r)}}function v(t){return function(e,r,o){o=i(o||n),e=e||[];var c=h(e);if(0>=t)return o(null);var a=!1,f=0,l=!1;!function s(){if(a&&0>=f)return o(null);for(;t>f&&!l;){var n=c();if(null===n)return a=!0,void(0>=f&&o(null));f+=1,r(e[n],n,u(function(n){f-=1,n?(o(n),l=!0):s()}))}}()}}function d(n){return function(t,e,r){return n(C.eachOf,t,e,r)}}function g(n){return function(t,e,r,u){return n(v(e),t,r,u)}}function k(n){return function(t,e,r){return n(C.eachOfSeries,t,e,r)}}function b(t,e,r,u){u=i(u||n),e=e||[];var c=o(e)?[]:{};t(e,function(n,t,e){r(n,function(n,r){c[t]=r,e(n)})},function(n){u(n,c)})}function w(n,t,e,r){var u=[];n(t,function(n,t,r){e(n,function(e){e&&u.push({index:t,value:n}),r()})},function(){r(a(u.sort(function(n,t){return n.index-t.index}),function(n){return n.value}))})}function O(n,t,e,r){w(n,t,function(n,t){e(n,function(n){t(!n)})},r)}function S(n,t,e){return function(r,u,i,o){function c(){o&&o(e(!1,void 0))}function a(n,r,u){return o?void i(n,function(r){o&&t(r)&&(o(e(!0,n)),o=i=!1),u()}):u()}arguments.length>3?n(r,u,a,c):(o=i,i=u,n(r,a,c))}}function E(n,t){return t}function L(t,e,r){r=r||n;var u=o(e)?[]:{};t(e,function(n,t,e){n(m(function(n,r){r.length<=1&&(r=r[0]),u[t]=r,e(n)}))},function(n){r(n,u)})}function I(n,t,e,r){var u=[];n(t,function(n,t,r){e(n,function(n,t){u=u.concat(t||[]),r(n)})},function(n){r(n,u)})}function x(t,e,r){function i(t,e,r,u){if(null!=u&&"function"!=typeof u)throw new Error("task callback must be a function");return t.started=!0,M(e)||(e=[e]),0===e.length&&t.idle()?C.setImmediate(function(){t.drain()}):(c(e,function(e){var i={data:e,callback:u||n};r?t.tasks.unshift(i):t.tasks.push(i),t.tasks.length===t.concurrency&&t.saturated()}),void C.setImmediate(t.process))}function o(n,t){return function(){f-=1;var e=!1,r=arguments;c(t,function(n){c(l,function(t,r){t!==n||e||(l.splice(r,1),e=!0)}),n.callback.apply(n,r)}),n.tasks.length+f===0&&n.drain(),n.process()}}if(null==e)e=1;else if(0===e)throw new Error("Concurrency must not be zero");var f=0,l=[],s={tasks:[],concurrency:e,payload:r,saturated:n,empty:n,drain:n,started:!1,paused:!1,push:function(n,t){i(s,n,!1,t)},kill:function(){s.drain=n,s.tasks=[]},unshift:function(n,t){i(s,n,!0,t)},process:function(){if(!s.paused&&f=t;t++)C.setImmediate(s.process)}}};return s}function j(n){return m(function(t,e){t.apply(null,e.concat([m(function(t,e){"object"==typeof console&&(t?console.error&&console.error(t):console[n]&&c(e,function(t){console[n](t)}))})]))})}function A(n){return function(t,e,r){n(f(t),e,r)}}function T(n){return m(function(t,e){var r=m(function(e){var r=this,u=e.pop();return n(t,function(n,t,u){n.apply(r,e.concat([u]))},u)});return e.length?r.apply(this,e):r})}function z(n){return m(function(t){var e=t.pop();t.push(function(){var n=arguments;r?C.setImmediate(function(){e.apply(null,n)}):e.apply(null,n)});var r=!0;n.apply(this,t),r=!1})}var q,C={},P="object"==typeof self&&self.self===self&&self||"object"==typeof global&&global.global===global&&global||this;null!=P&&(q=P.async),C.noConflict=function(){return P.async=q,C};var H=Object.prototype.toString,M=Array.isArray||function(n){return"[object Array]"===H.call(n)},U=function(n){var t=typeof n;return"function"===t||"object"===t&&!!n},W=Object.keys||function(n){var t=[];for(var e in n)n.hasOwnProperty(e)&&t.push(e);return t},B="function"==typeof setImmediate&&setImmediate,D=B?function(n){B(n)}:function(n){setTimeout(n,0)};"object"==typeof process&&"function"==typeof process.nextTick?C.nextTick=process.nextTick:C.nextTick=D,C.setImmediate=B?D:C.nextTick,C.forEach=C.each=function(n,t,e){return C.eachOf(n,y(t),e)},C.forEachSeries=C.eachSeries=function(n,t,e){return C.eachOfSeries(n,y(t),e)},C.forEachLimit=C.eachLimit=function(n,t,e,r){return v(t)(n,y(e),r)},C.forEachOf=C.eachOf=function(t,e,r){function o(n){f--,n?r(n):null===c&&0>=f&&r(null)}r=i(r||n),t=t||[];for(var c,a=h(t),f=0;null!=(c=a());)f+=1,e(t[c],c,u(o));0===f&&r(null)},C.forEachOfSeries=C.eachOfSeries=function(t,e,r){function o(){var n=!0;return null===a?r(null):(e(t[a],a,u(function(t){if(t)r(t);else{if(a=c(),null===a)return r(null);n?C.setImmediate(o):o()}})),void(n=!1))}r=i(r||n),t=t||[];var c=h(t),a=c();o()},C.forEachOfLimit=C.eachOfLimit=function(n,t,e,r){v(t)(n,e,r)},C.map=d(b),C.mapSeries=k(b),C.mapLimit=g(b),C.inject=C.foldl=C.reduce=function(n,t,e,r){C.eachOfSeries(n,function(n,r,u){e(t,n,function(n,e){t=e,u(n)})},function(n){r(n,t)})},C.foldr=C.reduceRight=function(n,e,r,u){var i=a(n,t).reverse();C.reduce(i,e,r,u)},C.transform=function(n,t,e,r){3===arguments.length&&(r=e,e=t,t=M(n)?[]:{}),C.eachOf(n,function(n,r,u){e(t,n,r,u)},function(n){r(n,t)})},C.select=C.filter=d(w),C.selectLimit=C.filterLimit=g(w),C.selectSeries=C.filterSeries=k(w),C.reject=d(O),C.rejectLimit=g(O),C.rejectSeries=k(O),C.any=C.some=S(C.eachOf,e,t),C.someLimit=S(C.eachOfLimit,e,t),C.all=C.every=S(C.eachOf,r,r),C.everyLimit=S(C.eachOfLimit,r,r),C.detect=S(C.eachOf,t,E),C.detectSeries=S(C.eachOfSeries,t,E),C.detectLimit=S(C.eachOfLimit,t,E),C.sortBy=function(n,t,e){function r(n,t){var e=n.criteria,r=t.criteria;return r>e?-1:e>r?1:0}C.map(n,function(n,e){t(n,function(t,r){t?e(t):e(null,{value:n,criteria:r})})},function(n,t){return n?e(n):void e(null,a(t.sort(r),function(n){return n.value}))})},C.auto=function(t,e,r){function u(n){d.unshift(n)}function o(n){var t=p(d,n);t>=0&&d.splice(t,1)}function a(){h--,c(d.slice(0),function(n){n()})}r||(r=e,e=null),r=i(r||n);var f=W(t),h=f.length;if(!h)return r(null);e||(e=h);var y={},v=0,d=[];u(function(){h||r(null,y)}),c(f,function(n){function i(){return e>v&&l(g,function(n,t){return n&&y.hasOwnProperty(t)},!0)&&!y.hasOwnProperty(n)}function c(){i()&&(v++,o(c),h[h.length-1](d,y))}for(var f,h=M(t[n])?t[n]:[t[n]],d=m(function(t,e){if(v--,e.length<=1&&(e=e[0]),t){var u={};s(y,function(n,t){u[t]=n}),u[n]=e,r(t,u)}else y[n]=e,C.setImmediate(a)}),g=h.slice(0,h.length-1),k=g.length;k--;){if(!(f=t[g[k]]))throw new Error("Has inexistant dependency");if(M(f)&&p(f,n)>=0)throw new Error("Has cyclic dependencies")}i()?(v++,h[h.length-1](d,y)):u(c)})},C.retry=function(n,t,e){function r(n,t){if("number"==typeof t)n.times=parseInt(t,10)||i;else{if("object"!=typeof t)throw new Error("Unsupported argument type for 'times': "+typeof t);n.times=parseInt(t.times,10)||i,n.interval=parseInt(t.interval,10)||o}}function u(n,t){function e(n,e){return function(r){n(function(n,t){r(!n||e,{err:n,result:t})},t)}}function r(n){return function(t){setTimeout(function(){t(null)},n)}}for(;a.times;){var u=!(a.times-=1);c.push(e(a.task,u)),!u&&a.interval>0&&c.push(r(a.interval))}C.series(c,function(t,e){e=e[e.length-1],(n||a.callback)(e.err,e.result)})}var i=5,o=0,c=[],a={times:i,interval:o},f=arguments.length;if(1>f||f>3)throw new Error("Invalid arguments - must be either (task), (task, callback), (times, task) or (times, task, callback)");return 2>=f&&"function"==typeof n&&(e=t,t=n),"function"!=typeof n&&r(a,n),a.callback=e,a.task=t,a.callback?u():u},C.waterfall=function(t,e){function r(n){return m(function(t,u){if(t)e.apply(null,[t].concat(u));else{var i=n.next();i?u.push(r(i)):u.push(e),z(n).apply(null,u)}})}if(e=i(e||n),!M(t)){var u=new Error("First argument to waterfall must be an array of functions");return e(u)}return t.length?void r(C.iterator(t))():e()},C.parallel=function(n,t){L(C.eachOf,n,t)},C.parallelLimit=function(n,t,e){L(v(t),n,e)},C.series=function(n,t){L(C.eachOfSeries,n,t)},C.iterator=function(n){function t(e){function r(){return n.length&&n[e].apply(null,arguments),r.next()}return r.next=function(){return er;){var i=r+(u-r+1>>>1);e(t,n[i])>=0?r=i:u=i-1}return r}function i(t,e,i,o){if(null!=o&&"function"!=typeof o)throw new Error("task callback must be a function");return t.started=!0,M(e)||(e=[e]),0===e.length?C.setImmediate(function(){t.drain()}):void c(e,function(e){var c={data:e,priority:i,callback:"function"==typeof o?o:n};t.tasks.splice(u(t.tasks,c,r)+1,0,c),t.tasks.length===t.concurrency&&t.saturated(),C.setImmediate(t.process)})}var o=C.queue(t,e);return o.push=function(n,t,e){i(o,n,t,e)},delete o.unshift,o},C.cargo=function(n,t){return x(n,1,t)},C.log=j("log"),C.dir=j("dir"),C.memoize=function(n,e){var r={},u={};e=e||t;var i=m(function(t){var i=t.pop(),o=e.apply(null,t);o in r?C.setImmediate(function(){i.apply(null,r[o])}):o in u?u[o].push(i):(u[o]=[i],n.apply(null,t.concat([m(function(n){r[o]=n;var t=u[o];delete u[o];for(var e=0,i=t.length;i>e;e++)t[e].apply(null,n)})])))});return i.memo=r,i.unmemoized=n,i},C.unmemoize=function(n){return function(){return(n.unmemoized||n).apply(null,arguments)}},C.times=A(C.map),C.timesSeries=A(C.mapSeries),C.timesLimit=function(n,t,e,r){return C.mapLimit(f(n),t,e,r)},C.seq=function(){var t=arguments;return m(function(e){var r=this,u=e[e.length-1];"function"==typeof u?e.pop():u=n,C.reduce(t,e,function(n,t,e){t.apply(r,n.concat([m(function(n,t){e(n,t)})]))},function(n,t){u.apply(r,[n].concat(t))})})},C.compose=function(){return C.seq.apply(null,Array.prototype.reverse.call(arguments))},C.applyEach=T(C.eachOf),C.applyEachSeries=T(C.eachOfSeries),C.forever=function(t,e){function r(n){return n?i(n):void o(r)}var i=u(e||n),o=z(t);r()},C.ensureAsync=z,C.constant=m(function(n){var t=[null].concat(n);return function(n){return n.apply(this,t)}}),C.wrapSync=C.asyncify=function(n){return m(function(t){var e,r=t.pop();try{e=n.apply(this,t)}catch(u){return r(u)}U(e)&&"function"==typeof e.then?e.then(function(n){r(null,n)})["catch"](function(n){r(n.message?n:new Error(n))}):r(null,e)})},"object"==typeof module&&module.exports?module.exports=C:"function"==typeof define&&define.amd?define([],function(){return C}):P.async=C}(); }).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) },{"_process":14}],13:[function(require,module,exports){ exports.load=function(e,t){var n=new XMLHttpRequest;n.onreadystatechange=function(){if(4===n.readyState)if(n.status>=200&&n.status<300)t(null,n.responseText);else{var r=new Error("failed to request file '"+e+"'");r.errno=34,t(r)}};try{n.open("GET",e,!0),n.send(null)}catch(r){t(r)}}; },{}],14:[function(require,module,exports){ function cleanUpNextTick(){draining=!1,currentQueue.length?queue=currentQueue.concat(queue):queueIndex=-1,queue.length&&drainQueue()}function drainQueue(){if(!draining){var e=setTimeout(cleanUpNextTick);draining=!0;for(var n=queue.length;n;){for(currentQueue=queue,queue=[];++queueIndex1)for(var r=1;r