// MIT License // Copyright (c) 2024 Joao Chrisostomo, Kacper Michajłow // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. //!DESC ArtCNN C4F16 (Conv2D) //!COMPUTE 24 32 12 16 //!HOOK LUMA //!BIND LUMA //!SAVE conv2d //!WIDTH LUMA.w 2.0 * //!HEIGHT LUMA.h 2.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif const ivec2 ksize = ivec2(3, 3); const ivec2 offset = ksize / 2; const ivec2 wg_size = ivec2(gl_WorkGroupSize); const ivec2 isize = wg_size + ksize - 1; shared F inp[1][isize.y][isize.x]; void hook() { const uvec2 local_xy = gl_LocalInvocationID.xy; ivec2 base = ivec2(gl_WorkGroupID) * wg_size; for (uint y = local_xy.y; y < isize.y; y += wg_size.y) { for (uint x = local_xy.x; x < isize.x; x += wg_size.x) { const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(1, 1); inp[0][y][x] = F(LUMA_mul * texelFetch(LUMA_raw, input_base + ivec2(0, 0), 0).x); } } barrier(); V4 result0 = V4(-0.0027198044, -0.013629392, -0.015712878, -0.050803013); V4 result1 = V4(-0.02707489, -0.0062177293, 0.0026368732, -0.0029379292); V4 result2 = V4(0.03127001, -0.0039273943, -0.0040966137, -0.0016518718); V4 result3 = V4(0.0028380281, 0.00058883557, 0.013085538, -0.058857743); const F inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0]; const F inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1]; const F inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2]; const F inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0]; const F inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1]; const F inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2]; const F inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0]; const F inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1]; const F inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2]; result0 += V4(-0.016452063, -0.1258466, 0.013886958, 0.036870774) * inp_0_0_0; result0 += V4(0.04311634, 0.15515013, 0.12190506, 0.12543218) * inp_0_1_0; result0 += V4(-0.0049624983, 0.1029244, -0.10124424, 0.06448426) * inp_0_2_0; result0 += V4(0.001886782, 0.06120591, 0.020384936, 0.16804346) * inp_0_0_1; result0 += V4(-0.04256893, -0.07616671, -0.37889892, 0.27856478) * inp_0_1_1; result0 += V4(-0.20398517, -0.12900643, 0.113083735, 0.11175711) * inp_0_2_1; result0 += V4(0.009553091, 0.13118562, -0.031063978, 0.09478131) * inp_0_0_2; result0 += V4(0.066157505, -0.114692695, 0.22418123, -0.009412468) * inp_0_1_2; result0 += V4(0.15508306, 0.011386595, 0.014014352, 0.09318008) * inp_0_2_2; result1 += V4(0.08046117, -0.07086712, -0.102300294, 0.014950261) * inp_0_0_0; result1 += V4(-0.06476857, -0.014190924, -0.017589286, -0.19119741) * inp_0_1_0; result1 += V4(0.05054515, 0.115604624, 0.06517106, 0.13799176) * inp_0_2_0; result1 += V4(-0.045681432, 0.08269155, 0.10319298, -0.026858954) * inp_0_0_1; result1 += V4(0.11229104, -0.17059296, 0.13794285, 0.18026339) * inp_0_1_1; result1 += V4(-0.1267971, 0.23877597, -0.18725446, -0.12132741) * inp_0_2_1; result1 += V4(0.05785694, -0.015154775, 0.026422592, 0.002328838) * inp_0_0_2; result1 += V4(0.07150728, -0.22784448, -0.12155527, 0.027110105) * inp_0_1_2; result1 += V4(-0.08247087, 0.06362491, 0.08973536, -0.02196324) * inp_0_2_2; result2 += V4(-0.06092033, 0.1256232, -0.11233013, -0.061837807) * inp_0_0_0; result2 += V4(0.08898802, -0.028417582, 0.15791786, -0.01610648) * inp_0_1_0; result2 += V4(0.06330266, -0.009340407, 0.017859828, -0.007937439) * inp_0_2_0; result2 += V4(-0.17722517, 0.31189576, 0.32109433, 0.18112311) * inp_0_0_1; result2 += V4(-0.2903746, -0.72364086, -0.3329427, -0.08360631) * inp_0_1_1; result2 += V4(0.14228302, 0.11720193, -0.056604996, -0.027815754) * inp_0_2_1; result2 += V4(0.035853237, 0.118430145, -0.12544365, -0.02719196) * inp_0_0_2; result2 += V4(0.20537417, 0.07353585, 0.10881828, 0.1451791) * inp_0_1_2; result2 += V4(-0.1517126, -0.010349405, 0.018765846, -0.09707698) * inp_0_2_2; result3 += V4(0.052764144, -0.10130216, 0.22795214, -0.09385554) * inp_0_0_0; result3 += V4(-0.16102873, 0.18050277, 0.36273104, 0.1743911) * inp_0_1_0; result3 += V4(0.008320275, -0.031096114, 0.06665433, 0.047147725) * inp_0_2_0; result3 += V4(0.039706435, -0.0059984834, 0.026533028, -0.19475575) * inp_0_0_1; result3 += V4(0.017116806, -0.1657458, -0.4245533, 0.011194904) * inp_0_1_1; result3 += V4(0.03566397, 0.1254953, -0.16895337, 0.20406392) * inp_0_2_1; result3 += V4(-0.0622524, 0.11329407, -0.052762877, -0.081980705) * inp_0_0_2; result3 += V4(0.08946176, -0.05226282, -0.15308078, -0.0015630769) * inp_0_1_2; result3 += V4(-0.018317576, -0.06487258, -0.012865839, 0.13352033) * inp_0_2_2; const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2); imageStore(out_image, output_base + ivec2(0, 0), result0); imageStore(out_image, output_base + ivec2(1, 0), result1); imageStore(out_image, output_base + ivec2(0, 1), result2); imageStore(out_image, output_base + ivec2(1, 1), result3); } //!DESC ArtCNN C4F16 (Conv2D-1-ReLU) //!COMPUTE 24 32 12 16 //!HOOK LUMA //!BIND conv2d //!SAVE conv2d_1 //!WIDTH LUMA.w 2.0 * //!HEIGHT LUMA.h 2.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif const ivec2 ksize = ivec2(3, 3); const ivec2 offset = ksize / 2; const ivec2 wg_size = ivec2(gl_WorkGroupSize); const ivec2 isize = wg_size + ksize - 1; shared V4 inp[4][isize.y][isize.x]; void hook() { const uvec2 local_xy = gl_LocalInvocationID.xy; ivec2 base = ivec2(gl_WorkGroupID) * wg_size; for (uint y = local_xy.y; y < isize.y; y += wg_size.y) { for (uint x = local_xy.x; x < isize.x; x += wg_size.x) { const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2); inp[0][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 0), 0)); inp[1][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 0), 0)); inp[2][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 1), 0)); inp[3][y][x] = V4(conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 1), 0)); } } barrier(); V4 result0 = V4(0.0035388642, 0.012181487, 0.06403471, 0.015263772); V4 result1 = V4(0.048399653, -0.0074684187, -0.0068150507, 0.016319986); V4 result2 = V4(0.015551343, 0.028928788, 0.0074838563, 0.012831508); V4 result3 = V4(0.015546932, 0.0018076884, 0.003934822, -0.027352111); const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0]; const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1]; const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2]; const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0]; const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1]; const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2]; const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0]; const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1]; const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.08193566, 0.39840552, -0.05933193, 0.029763913, 0.120781906, 0.13874644, 0.06285476, -0.031514462, -0.33653876, 0.23761259, -0.07119319, -0.08926567, 0.00014508692, -0.062654756, -0.13101529, 0.029373677) * inp_0_0_0; result0 += M4(-0.2686895, 0.10908275, 0.024864523, 0.028138349, 0.63595426, 0.20497061, -0.0053157317, 0.09559526, -0.6010695, -0.52804697, -0.08165241, -0.07418531, -0.36249816, 0.09965129, 0.043142296, 0.11102198) * inp_0_1_0; result0 += M4(0.32003805, -0.06369186, 0.110159315, -0.010411652, 0.14303263, -0.18286166, 0.17141442, -0.0774708, -0.2750923, 0.048123654, -0.042993017, 0.055669174, -0.08664134, -0.014772742, -0.09638268, -0.060853086) * inp_0_2_0; result0 += M4(-0.18638985, -0.10025375, -0.01221694, -0.18347955, -0.22381034, -0.15570909, 0.22322272, -0.18700717, 0.23769893, 0.14806393, -0.008849564, 0.0136317285, 0.03139464, 0.08378959, -0.012119499, 0.026419694) * inp_0_0_1; result0 += M4(-0.09444148, -0.16680825, -0.04191063, -0.11433058, -0.46190983, 0.24968515, 0.09260946, -0.039239544, -0.48942524, 0.65290695, 0.026454417, -0.3996515, 0.035578046, 0.03752698, 0.11413368, 0.12726994) * inp_0_1_1; result0 += M4(-0.3939464, 0.011404807, -0.08643197, -0.07001899, -0.4240259, -0.11423647, -0.06039618, -0.045650464, 0.18176849, 0.118614994, 0.07916771, -0.14000084, -0.04808409, 0.06989854, 0.06425597, 0.038330052) * inp_0_2_1; result0 += M4(0.088639356, 0.063344724, -0.0034504097, 0.2238218, -0.07661125, -0.10738507, -0.0092966715, 0.3010383, 0.31496724, -0.1979007, 0.05447602, -0.10734698, -0.053885452, -0.05719943, -0.113555916, -0.021155154) * inp_0_0_2; result0 += M4(0.015520851, 0.026369756, -0.04167262, 0.141523, 0.073253386, 0.084800765, -0.0676251, 0.5053454, 0.28553674, -0.15500426, -0.24915896, 0.9031177, -0.0045735966, -0.039383005, -0.060552903, 0.024590101) * inp_0_1_2; result0 += M4(-0.21189246, -0.050623517, 0.013824571, 0.054410357, 0.18887533, -0.2435286, -0.11421549, -0.16462444, 0.0728174, -0.09657451, 0.07730208, -0.07939918, 0.034992587, -0.07169108, 0.04370187, -0.0785741) * inp_0_2_2; result1 += M4(-0.08724563, -0.029360695, 0.065424606, 0.18016396, -0.066029646, 0.009851433, 0.044099204, 0.062524706, 0.075378865, -0.04025694, -0.00035143772, 0.02216928, 0.07692978, -0.07277084, -0.061999667, -0.12335496) * inp_0_0_0; result1 += M4(0.15146391, -0.045693725, 0.08794338, -0.07766357, -0.13882384, 0.11807377, 0.16390486, 0.060984742, -0.050909836, 0.011076402, -0.0069654104, -0.008235624, 0.13997374, -0.055829335, -0.11395829, -0.045706417) * inp_0_1_0; result1 += M4(0.11037505, 0.037779838, 0.041720603, -0.11670487, -0.10206224, 0.29574063, -0.017590221, -0.029810347, 0.08654991, 0.11194881, -0.09588386, 0.11034557, -0.11284499, -0.038090393, -0.060732067, 0.15201814) * inp_0_2_0; result1 += M4(-0.02798946, 0.2645396, -0.05080384, 0.39022708, -0.07972038, -0.03486675, 0.1624059, 0.1826741, -0.0305445, 0.3158071, 0.0032137304, 0.06960887, 0.061508473, -0.060662374, -0.03728802, -0.10974534) * inp_0_0_1; result1 += M4(0.13413881, -0.19690159, 0.25165883, -0.025285713, 0.19857527, -0.2321096, -0.056400586, -0.11435613, -0.1891792, 0.15364274, 0.95750797, -0.031228404, 0.17185828, 0.072887875, -0.11088333, -0.08581528) * inp_0_1_1; result1 += M4(0.09028881, -0.1780539, -0.04785461, -0.09430586, -0.044199556, -0.17495096, 0.2255327, -0.04418362, 0.42351684, -0.17086548, 0.108544886, 0.063870065, -0.20677198, 0.0918337, -0.031405777, 0.026987633) * inp_0_2_1; result1 += M4(0.03143219, -0.06746378, -0.077566706, -0.19247384, 0.16452655, -0.5051362, -0.16162011, -0.08145442, 0.05511902, 0.030584775, -0.071060166, 0.25326735, -0.016743116, -0.0618647, 0.1214063, -0.04310255) * inp_0_0_2; result1 += M4(0.13270794, -0.12859413, -0.13721833, -0.011795724, 0.11956431, 0.026687238, -0.52203315, -0.317287, -0.012819699, -0.45424682, -0.681156, -0.18422177, 0.07963337, 0.04038403, 0.086956374, 0.0706985) * inp_0_1_2; result1 += M4(0.2106671, -0.09430158, -0.077296756, 0.001207568, -0.019188594, -0.011531864, -0.046601746, -0.057146672, 0.0503799, -0.118843146, -0.124916956, -0.08522102, -0.14284284, -0.1193746, 0.06719082, 0.09910482) * inp_0_2_2; result2 += M4(-0.2244822, -0.082729764, -0.44512755, -0.77702785, 0.070779085, -0.06795795, 0.048184644, -0.41027415, -0.21277706, -0.20790118, 0.36475337, -0.31860444, 0.014606301, -0.08359864, 0.03596085, 0.0350377) * inp_0_0_0; result2 += M4(0.09447366, 0.12554698, 0.052572437, 0.004993876, 0.015338761, 0.008580391, 0.024699932, -0.18695894, 0.12486596, 0.16261359, 0.05223534, -0.49886906, 0.058645505, 0.11000217, -0.052211735, -0.10267668) * inp_0_1_0; result2 += M4(0.09253685, 0.32692894, -0.11326509, -0.2243793, 0.055343658, 0.062875904, 0.1316403, -0.37698555, -0.06615555, 0.2947396, -0.30346048, -0.20655632, -0.052567784, -0.17170146, -0.10338503, -0.028746855) * inp_0_2_0; result2 += M4(-0.12259729, -0.119727425, 0.593932, -0.10741678, -0.0150476275, -0.15803762, 0.43156016, -0.09719216, -0.07408, -0.12842028, -0.47880188, 0.41297135, 0.07315841, 0.08734928, -0.031542793, 0.0862864) * inp_0_0_1; result2 += M4(-0.14790195, 0.028560948, 0.14077131, 0.15227696, -0.11203044, 0.1193981, 0.16548304, 0.07721256, -0.48270524, -0.14609018, 0.13051052, 0.16035795, 0.22310275, 0.08613812, 0.12101891, 0.0693605) * inp_0_1_1; result2 += M4(-0.17909749, -0.024392027, 0.2227383, -0.057302292, -0.05009124, 0.30629346, -0.0658257, 0.10691373, -0.09005195, 0.26912916, -0.02555377, 0.18611579, 0.09474404, -0.046735127, 0.08628372, 0.14976539) * inp_0_2_1; result2 += M4(-0.12682188, -0.033667836, -0.21387348, 0.11724357, 0.11241762, -0.10776379, -0.45622072, 0.36206225, 0.034357004, 0.0274606, 0.53226703, -0.054120746, -0.07173099, -0.019781826, -0.0004506044, -0.090236515) * inp_0_0_2; result2 += M4(0.10396443, -0.05090273, 0.030880392, 0.13643251, 0.4523865, 0.081858695, -0.08280347, 0.18518096, 0.56079537, 0.007042201, -0.012337641, 0.0017952896, -0.19653215, 0.10902694, -0.0235898, -0.15462936) * inp_0_1_2; result2 += M4(0.011347261, 0.16956125, -0.07598024, -0.010642945, -0.06959051, 0.097124174, -0.10528735, 0.023413276, 0.011072511, -0.11380183, -0.1546189, -0.018206634, -0.072076365, -0.09334556, -0.03711523, -0.061668754) * inp_0_2_2; result3 += M4(0.20372537, 0.14578828, 0.20127167, -0.16066748, 0.09756872, -0.10025203, 0.18399055, 0.057970647, 0.03679455, -0.13061528, -0.13194819, -0.017791305, -0.027707439, -0.037850592, -0.0058791186, 0.15898317) * inp_0_0_0; result3 += M4(-0.13326374, -0.048175838, -0.1211976, -0.047573894, 0.029398272, 0.038163513, 0.06364081, 0.09756068, 0.112353444, -0.06523655, -0.3399193, 0.007071915, -0.0023307495, 0.06515439, -0.008210844, -0.048217848) * inp_0_1_0; result3 += M4(-0.21247563, 0.08523739, 0.04721741, 0.06442855, -0.051469017, -0.092593156, 0.047065668, 0.02163933, -0.20487121, -0.2948929, 0.14564759, 0.072847456, 0.009143655, 0.061654102, -0.008843048, -0.095890515) * inp_0_2_0; result3 += M4(0.22976227, -0.0034405778, -0.33477315, 0.12545134, 0.17131741, -0.11558586, 0.03167751, 0.14242904, 0.1061916, -0.18220623, 0.07726241, -0.48172006, -0.14746118, -0.11912247, 0.030638449, 0.10207886) * inp_0_0_1; result3 += M4(-0.06655945, -0.019838296, -0.038446557, -0.047108516, -0.30915856, 0.07429464, -0.20438327, 0.14802341, 0.47137445, 0.024445575, 0.42510328, 0.242163, -0.021400312, 0.107167594, -0.056022163, 0.020425789) * inp_0_1_1; result3 += M4(-0.0074782637, 0.054402016, -0.121972315, 0.041444067, -0.35059676, -0.15080321, -0.29118222, -0.016145391, -0.3576716, -0.3522069, 0.043113176, 0.010357731, 0.14414723, -0.113717005, -0.09415485, -0.028949017) * inp_0_2_1; result3 += M4(0.03853427, 0.03176998, -0.07919849, 0.018467484, 0.03600878, 0.56193393, 0.13832507, 0.07699603, -0.15650001, -0.07601528, 0.16394944, -0.026312442, -0.07145015, -0.036025092, -0.04234946, 0.041831832) * inp_0_0_2; result3 += M4(-0.016900353, -0.28659436, 0.011943033, -0.07257182, 0.014030349, 0.035322875, -0.5988642, -0.14678574, -0.0044690417, 0.8888096, -0.23830332, 0.10747964, 0.02044547, 0.1279399, 0.0006579104, -0.011562968) * inp_0_1_2; result3 += M4(-0.089198224, 0.16423313, -0.29691276, 0.07104331, 0.13190354, 0.0776772, 0.08241301, -0.14865209, -0.08948346, 0.1620836, -0.32379845, 0.0013747211, 0.04707559, 0.0037285136, -0.015344385, -0.014232466) * inp_0_2_2; const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0]; const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1]; const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2]; const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0]; const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1]; const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2]; const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0]; const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1]; const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.85313725, -0.02435101, -0.06857936, 0.04679383, -0.2585699, 0.081624314, 0.24544875, -0.027587967, -0.7746935, 0.5622293, -0.09342312, -0.03688094, -0.34380323, 0.72687024, -0.048763245, -0.003689941) * inp_1_0_0; result0 += M4(-0.35174468, 0.117986694, 0.08605906, 0.028830849, 0.64237136, -0.3769583, -0.024089484, -0.23862994, 0.8207976, -0.092174746, 0.04297148, -0.051756054, -0.29739568, -0.11195339, -0.14723086, 0.03295132) * inp_1_1_0; result0 += M4(0.2954745, 0.21712531, -0.067807004, 0.029517183, 0.16259618, 0.014441043, 0.0127598, 0.029936556, -0.4073981, -0.48001292, 0.10198077, 0.17044066, 0.058579385, -0.16940872, -0.22117646, 0.0061707636) * inp_1_2_0; result0 += M4(-0.37001958, -0.071665555, 0.10686925, -0.10744776, -0.11030434, -0.016945168, -0.14443113, 0.039766748, -0.041175894, -0.31643036, -0.009723037, 0.11083293, -0.35056636, 0.50091743, 0.21538898, 0.13894936) * inp_1_0_1; result0 += M4(0.18551482, -0.38274184, 0.1090545, 0.09133414, -0.100317314, 0.26533842, 0.028536757, 0.07844555, 0.006583591, 0.27766275, 0.1254183, -0.0061147013, 0.8617392, -0.7546585, -0.23212102, 0.2814898) * inp_1_1_1; result0 += M4(-0.17533015, 0.056518923, -0.11886858, -0.17416163, -0.21810603, -0.041996296, -0.13190891, 0.10978519, -0.13276191, 0.29193908, -0.10255887, -0.15144268, 0.39411002, 0.5364633, -0.07876772, 0.12273716) * inp_1_2_1; result0 += M4(0.025226103, 0.14986138, -0.05295039, 0.12537077, 0.10823799, -0.15962936, -0.04351816, 0.06994498, 0.3133468, 0.14449136, -0.20122978, -0.04178362, -0.025069045, 0.052850995, -0.31893167, 0.31568733) * inp_1_0_2; result0 += M4(0.10307432, -0.03794696, 0.11638587, -0.15721007, 0.0520565, -0.013159431, 0.0033045416, 0.16645943, -0.04602354, 0.07579619, 0.119559675, 0.10222658, -0.076188296, -0.03417925, 0.35578036, -0.90802264) * inp_1_1_2; result0 += M4(-0.13523121, 0.14683475, -0.24159884, 0.011003018, 0.15668543, -0.012923501, -0.049195066, -0.15511718, 0.21970461, -0.40896636, 0.14699629, -0.10037102, -0.011684425, -0.44974482, 0.075924896, -0.13819264) * inp_1_2_2; result1 += M4(-0.32133275, -0.1632966, 0.016118964, -0.004102593, 0.32084933, 0.41906273, 0.08610651, 0.17084497, 0.13008702, 0.09139067, 0.21345861, -0.12968206, 0.008954712, -0.07455759, -0.25342295, -0.13596231) * inp_1_0_0; result1 += M4(0.110927545, 0.026440676, -0.057138063, -0.026707454, -0.23099862, 0.10929439, 0.18417938, -0.08928301, 0.10867742, -0.062411074, 0.17694812, 0.27520618, 0.14665566, -0.19123065, -0.0028814063, 0.020384334) * inp_1_1_0; result1 += M4(0.13942137, 0.0722548, 0.085695975, -0.0482584, -0.058018155, -0.06788848, 0.12553318, -0.017362285, -0.28463066, 0.2797977, -0.1744807, 0.07061992, 0.19873491, -0.16651899, -0.06323969, -0.02578639) * inp_1_2_0; result1 += M4(-0.025543625, -0.61818314, 0.05540194, -0.36864513, 0.13506746, 0.45087427, 0.02561734, 0.4824854, -0.12235725, -0.13946341, -0.06030317, 0.14183283, -0.25570124, -0.47644913, -0.019941723, -0.45477942) * inp_1_0_1; result1 += M4(0.3645578, 0.068593346, -0.1665043, 0.16704038, -0.2967851, 0.026300274, -0.13397226, -0.32232535, 0.13193616, -0.22311744, -0.41460353, -0.28414023, 0.4073843, 0.07373801, -0.22364068, 0.12431164) * inp_1_1_1; result1 += M4(0.25017032, 0.070454225, 0.13813335, 0.019375844, -0.096448295, 0.072787635, 0.0041182754, -0.15815192, -0.2322587, -0.19746271, 0.13508892, 0.14120723, 0.11005683, 0.08688148, -0.15526778, -0.075340524) * inp_1_2_1; result1 += M4(-0.14920484, -0.48791856, -0.023223, -0.3027848, 0.3378221, 0.4793663, -0.14946601, 0.9567044, -0.18527344, -0.119982645, -0.08596696, -0.17026097, 0.23330817, 0.036837753, -0.08102003, 0.29802063) * inp_1_0_2; result1 += M4(0.10104728, 0.017842278, 0.20009801, 0.2362712, -0.16312838, -0.04045318, -0.3592395, -0.20366094, 0.2688829, 0.30382335, 0.08821922, -0.27768466, -0.07204707, 0.67338127, 0.9684179, 0.26206905) * inp_1_1_2; result1 += M4(0.20085827, -0.1115579, -0.20676771, -0.14104019, -0.25027877, 0.13877366, -0.032097857, 0.011562554, -0.06354945, 0.1315248, 0.17668413, 0.061860748, -0.24132724, -0.11776977, 0.18664834, 0.05808953) * inp_1_2_2; result2 += M4(-0.072219275, 0.024704212, 0.6167852, -0.5133586, 0.23397118, 0.12476223, -0.24312721, 0.7445347, -0.19348057, -0.09359127, -0.8927323, -0.27338013, -0.08031656, -0.08860313, 0.32231623, -0.64860487) * inp_1_0_0; result2 += M4(-0.045371484, 0.12290978, 0.19355193, -0.03915953, -0.10403266, -0.26496485, 0.008814081, -0.16285874, -0.18879507, 0.1972924, 0.12899448, 0.15576236, -0.21710408, -0.008091758, 0.0434765, -0.1066743) * inp_1_1_0; result2 += M4(-0.002016541, -0.0006886421, 0.33393008, -0.039007384, 0.020184148, -0.0028074265, 0.13437714, 0.09344798, 0.20083073, -0.29304448, -0.07516481, 0.16750409, 0.067934446, 0.062371586, -0.09625756, 0.63305384) * inp_1_2_0; result2 += M4(-0.38875118, -0.0530316, -0.2627638, -0.37096834, 0.28227875, 0.057058726, -0.0007648176, 0.50170916, -0.014582225, -0.08303465, 1.2882401, -0.14439599, -0.5612461, -0.12507576, 0.7443172, 0.2275837) * inp_1_0_1; result2 += M4(-0.06241773, 0.23984735, -0.38247794, -0.193146, -0.023529753, -0.26548344, -0.0008831312, 0.054739162, 0.50808966, 0.04970531, 0.28555724, -0.07511468, 0.21564315, 0.14087324, -0.31117678, -0.52685535) * inp_1_1_1; result2 += M4(-0.067698725, -0.12012657, -0.18702506, 0.012525038, 0.11423368, 0.19669291, 0.0687259, -0.08449188, 0.114131555, 0.08787378, 0.027727695, -0.08600934, -0.1309175, 0.026612828, -0.078773305, 0.4300998) * inp_1_2_1; result2 += M4(-0.07888591, -0.19316435, 0.07030573, -0.23898849, 0.30641487, -0.050587706, -0.067026295, 0.50440204, -0.080069296, -0.09858528, -1.2006693, 0.22419576, 0.69720376, -0.4077811, -0.82145584, -0.3318518) * inp_1_0_2; result2 += M4(0.253429, 0.034562856, -0.043068126, 0.15730365, 0.18888126, -0.21013, -0.13781698, 0.21707365, -0.19726713, 0.027488727, 0.18039607, 0.11479809, -0.41453183, 0.31391656, 0.0070650936, 0.076380774) * inp_1_1_2; result2 += M4(0.003429857, -0.018602068, -0.066684365, 0.11482386, -0.036528666, -0.029451601, -0.052084677, -0.035276096, -0.18110622, 0.20486076, 0.20484242, -0.0025528024, -0.19367853, 0.24121977, 0.025350042, 0.10913147) * inp_1_2_2; result3 += M4(-0.0023735664, 0.15035935, 0.12993571, 0.20475487, 0.0019596012, -0.09204715, -0.13434508, -0.2821397, -0.05906947, 0.21287656, 0.30251858, -0.05017008, 0.03126201, 0.002017435, -0.50526565, 0.17320465) * inp_1_0_0; result3 += M4(0.099262066, 0.12742217, -0.16422229, 0.03186198, 0.12362506, -0.14565836, -0.01895703, 0.17308953, 0.18997124, -0.008680871, 0.5007363, -0.06796606, 0.11387236, 0.0110532595, 0.027245427, -0.02858506) * inp_1_1_0; result3 += M4(-0.1043227, -0.009697204, -0.14678726, 0.025163496, -0.195235, -0.03975564, 0.020124005, 0.036363356, 0.2044253, -0.08100828, -0.18540438, 0.024970459, -0.06662393, -0.24664907, 0.1844767, 0.039760955) * inp_1_2_0; result3 += M4(0.03556496, 0.40731633, -0.009378648, 0.35368907, 0.054531943, -0.25538638, 0.23750697, -0.7583463, 0.19742353, -0.098816276, -0.41828468, 0.54874414, -0.07079365, 0.020870816, -0.64631456, 0.61903435) * inp_1_0_1; result3 += M4(-0.3990005, 0.00885162, -0.3252981, -0.072892606, 0.5868726, 0.062424455, 0.12871675, 0.32273144, -0.5357227, 0.08173042, -0.20361531, -0.15989196, -0.7643352, -0.024472103, -0.46516174, -0.58503497) * inp_1_1_1; result3 += M4(0.1232718, 0.06953096, 0.17959209, 0.023106743, -0.18714033, -0.09933013, -0.032718066, 0.070490085, 0.00089770433, -0.053541496, -0.09406766, -0.0057063233, 0.38167843, -0.14396475, 0.5196705, -0.16198893) * inp_1_2_1; result3 += M4(0.0140656475, 0.21922494, -0.056840483, -0.07001231, -0.120163955, -0.22476105, 0.072396405, -0.06738373, -0.09434732, 0.123775706, 0.09688478, -0.159715, 0.0009263281, 0.38566205, 0.1203647, -0.082159035) * inp_1_0_2; result3 += M4(-0.058063142, -0.13751893, -0.012188545, -0.10500652, 0.17686076, 0.042373944, -0.23056524, 0.10845812, -0.019570937, -0.19659866, -0.14402579, -0.13492614, 0.26653746, -0.038438268, 0.6654084, 0.13422534) * inp_1_1_2; result3 += M4(0.051074985, 0.01658446, 0.27970675, 0.06717866, -0.029654497, -0.041104205, -0.031459272, 0.02453557, 0.14207277, 0.118784904, 0.08530623, 0.045611463, -0.036815416, 0.44440648, 0.41889176, 0.040619373) * inp_1_2_2; const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0]; const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1]; const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2]; const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0]; const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1]; const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2]; const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0]; const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1]; const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.041046668, -0.08714282, -0.073594816, 0.010970952, -0.1882758, 0.27579176, 0.03536449, -0.08542214, 0.3048594, 0.4530526, 0.0013206154, -0.028139513, 0.8478168, 0.23004742, 0.0076424624, -0.005963699) * inp_2_0_0; result0 += M4(-0.675687, -0.18131071, -0.11496222, 0.04182087, -0.031031555, -0.46466836, -0.06559829, -0.03957504, 0.044670437, -0.38225058, 0.06304581, 0.040416148, -1.1025981, -0.18210213, 0.047893796, 0.09789689) * inp_2_1_0; result0 += M4(-0.25182498, 0.11422446, 0.11699884, 0.001959186, -0.18557777, 0.25785574, 0.025503788, -0.026508162, -0.70701927, -0.19959773, -0.16261607, 0.044826515, 0.4344687, 0.07016805, 0.06883519, 0.01511665) * inp_2_2_0; result0 += M4(0.066674605, 0.19236362, -0.0785282, -0.05095323, 0.18426083, -0.029129753, -0.10607958, -0.026322767, 0.08938068, -0.46208933, 0.15560026, -0.15441328, -0.20653039, -0.031048529, 0.18270804, -0.052175336) * inp_2_0_1; result0 += M4(-0.17021075, 0.44645318, 0.064337924, -0.35476676, -0.06049966, 0.14457396, 0.23315272, -0.61046785, -0.16183764, 0.7003692, 0.15736891, -0.4137998, 0.23982742, 0.24342622, -0.20881954, -0.21306111) * inp_2_1_1; result0 += M4(-0.12433247, 0.17513616, 0.088211484, -0.034301586, 0.17206079, -0.12319476, -0.0711395, -0.09247507, 0.017651413, -0.17906149, -0.119941995, 0.22451276, -0.047592435, 0.09213579, -0.062360976, -0.013997644) * inp_2_2_1; result0 += M4(0.18996708, 0.01084998, -0.26635185, 0.122841895, 0.010606524, -0.027477054, 0.019549975, 0.03288411, -0.012691764, 0.051538102, -0.046197236, 0.06947832, -0.11301016, 0.09755053, -0.07446157, 0.1486752) * inp_2_0_2; result0 += M4(-0.36824858, 0.05843571, -0.2618618, 0.7284761, -0.21758777, 0.15616763, 0.1021092, 0.39711222, 0.033237204, 0.023642745, -0.21918292, 0.5568767, 0.13701046, -0.25721508, 0.082974575, 0.0041465224) * inp_2_1_2; result0 += M4(0.09509355, -0.15123545, -0.034223463, -0.04543954, -0.079144716, 0.09071367, 0.09338266, 0.17455119, 0.1773881, -0.053871572, 0.037042413, -0.17526062, -0.070509546, -0.19783144, 0.011318544, -0.026450291) * inp_2_2_2; result1 += M4(-0.061877683, -0.12305839, -0.20263477, -0.11488986, 0.24543491, 0.32654366, 0.10685588, 0.11506586, -0.10795856, 0.03018991, 0.059933174, 0.0853006, -0.20264159, -0.043828856, -0.20878649, 0.14681698) * inp_2_0_0; result1 += M4(-0.099625036, 0.20434903, 0.07796133, 0.06736927, 0.07125978, 0.05099109, 0.22723898, -0.08742187, 0.11490085, -0.04678864, -0.13050777, -0.08809224, 0.10186953, 0.16114867, -0.1435795, -0.05920464) * inp_2_1_0; result1 += M4(0.07486631, 0.0044751903, -0.093036935, 0.027565863, -0.07470142, 0.17695644, -0.07382289, -0.044591907, -0.21034206, -0.25844106, 0.18035167, 0.059543654, -0.06836321, -0.14411914, 0.1548659, -0.03892513) * inp_2_2_0; result1 += M4(0.117224984, 0.17542, -0.00016663033, -0.20314355, 0.22289772, 0.30088758, -0.032968026, -0.15414216, -0.140072, 0.14610891, 0.03125237, 0.3250524, 0.03702276, 0.10557298, 0.08998342, -0.43446854) * inp_2_0_1; result1 += M4(-0.10302777, 0.4501756, 0.8351375, -0.017320178, -0.14834926, -0.13270089, 0.9023029, -0.0824228, 0.09006571, 0.11064118, 0.35334682, -0.30232978, 0.12792541, -0.0071462905, 0.4237898, 0.38528624) * inp_2_1_1; result1 += M4(0.36576793, -0.050644517, 0.04195809, 0.108811006, -0.33933258, -0.048364114, 0.15662737, -0.09251479, -0.20477468, 0.2541265, 0.17623655, 0.20086327, -0.111452036, -0.09789033, -0.15870745, 0.027849512) * inp_2_2_1; result1 += M4(0.20742778, -0.14416832, -0.13775797, 0.58929, 0.16418126, 0.49148947, -0.18096113, 0.68119305, -0.2531042, -0.17075492, -0.12018323, 0.12608007, 0.021732088, -0.37607798, 0.0010915091, 0.001743941) * inp_2_0_2; result1 += M4(-0.15006953, -0.28072268, -0.44455764, 0.027220342, 0.08871589, -0.19202746, -0.5691258, -0.22697172, 0.5984513, -0.021040866, -0.5640603, -0.69894254, 0.10230523, 0.37776676, -0.16448745, -0.34962857) * inp_2_1_2; result1 += M4(0.28793243, -0.27053738, 0.037647776, 0.10638764, -0.29987827, -0.10918723, -0.15849937, 0.032926988, -0.276323, -0.30320406, -0.06410051, 0.079027995, -0.1144192, 0.0686072, 0.13253035, 0.16962466) * inp_2_2_2; result2 += M4(-0.104575686, -0.04030638, 0.38205162, -0.1646444, 0.0070794793, 0.086107545, 0.09008801, -0.26022068, -0.08264631, 0.0025685597, 0.45367503, 0.036392443, -0.05385623, 0.0043640602, 0.88977754, -0.6094777) * inp_2_0_0; result2 += M4(0.17220014, -0.104757294, 0.28872848, -0.9140526, 0.07250322, -0.03486389, 0.0519809, -0.28910607, 0.13334726, -0.18156597, -0.48792294, 0.45706385, 0.114385955, 0.06414442, -0.40856388, 0.26286653) * inp_2_1_0; result2 += M4(0.06838794, 0.0037464432, -0.25175664, -0.10634569, -0.07736329, 0.04136653, -0.09635174, 0.042798392, -0.089459404, 0.1431562, -0.35529405, -0.58673966, -0.009117467, -0.08846847, -0.13101909, -0.0941622) * inp_2_2_0; result2 += M4(0.014403663, -0.09284215, -0.22324999, 0.5977427, -0.13145088, 0.045327898, -0.2159109, 0.58904, -0.14557627, 0.047140498, 0.37750348, 0.04514063, -0.07165266, 0.00403441, -0.20610464, -0.22565311) * inp_2_0_1; result2 += M4(-0.34951925, -0.20195192, 0.052249074, 0.004859058, -0.4837092, -0.56432015, -0.14240249, -0.089294374, -0.32763055, -0.6932184, 0.18841206, -0.10697703, -0.12905538, -0.012168624, 0.33363688, -0.23945247) * inp_2_1_1; result2 += M4(-0.19418816, -0.07137613, 0.016112942, -0.06583376, 0.010781973, 0.64119726, 0.2163896, 0.053818654, 0.19992511, 1.0749966, 0.37210134, -0.114605546, 0.12615307, 0.045438178, -0.09376032, 0.095737554) * inp_2_2_1; result2 += M4(0.1762724, -0.12271588, 0.32391888, 0.05773013, 0.24969184, 0.04550068, 0.14141926, 0.3324033, 0.10539948, -0.22236584, 0.063619636, 0.37273255, 0.17779888, -0.049786404, -0.2504475, 0.6257438) * inp_2_0_2; result2 += M4(0.20685196, -0.0106152035, -0.16097048, -0.011148921, 0.29720846, -0.2017546, -0.055421192, 0.14278509, 0.4991777, 0.1290137, -0.6009226, 0.12703991, 0.019506518, -0.09236414, -0.16110387, 0.15518974) * inp_2_1_2; result2 += M4(-0.051044676, -0.0802801, -0.16391027, -0.20129019, 0.07025838, -0.08146353, -0.13828063, 0.089377195, -0.26447552, -0.09614584, -0.004125906, -0.071870096, -0.24505717, 0.08601064, -0.007302386, 0.09454271) * inp_2_2_2; result3 += M4(0.08847961, -0.17847943, -0.69526863, -0.10000524, 0.022599213, -0.106843226, -0.32798478, -0.30478817, 0.116008244, 0.037003428, 0.00491019, 0.012982513, 0.14008689, -0.12109373, -0.114603855, 0.044905365) * inp_2_0_0; result3 += M4(0.17639944, -0.044862606, -0.17100137, -0.005114512, 0.2445843, -0.058113806, -0.18633145, 0.22324812, 0.07353126, -0.08040312, 0.46093383, -0.1881027, 0.020793755, 0.05615095, -0.12523803, -0.11003187) * inp_2_1_0; result3 += M4(0.015362869, 0.11070844, 0.18635881, 0.049795542, -0.18239588, -0.06503216, -0.004792109, 0.09085003, -0.24347839, 0.0014260579, -0.35534698, -0.01901228, -0.11597512, -0.14712879, -0.15724897, 0.08272095) * inp_2_2_0; result3 += M4(0.03369274, -0.09148342, -0.11649345, -0.8705563, 0.07332814, -0.07007546, -0.118292294, -0.5086767, -0.09837098, -0.086299285, 0.39339754, -0.34102294, -0.14108673, 0.3166342, -0.09888971, 0.17531094) * inp_2_0_1; result3 += M4(0.44183612, 0.03237536, 0.17856692, 0.11106832, 0.45943972, -0.4042934, 0.42762074, 0.78249615, 0.49094617, 0.08979736, 0.63991433, 1.1504385, -0.19337255, -0.028499793, 0.0188026, 0.13573536) * inp_2_1_1; result3 += M4(-0.07735287, -0.10227065, -0.11877216, 0.12290771, -0.8072676, 0.0721539, -0.1910864, -0.089940324, -0.9754005, 0.3272343, 0.04703304, -0.093180224, -0.30274558, 0.3410099, 0.17829552, -0.1283383) * inp_2_2_1; result3 += M4(-0.014644263, -0.16843987, 0.10283902, -0.23915698, 0.04944409, -0.28421712, 0.029477706, -0.20114663, 0.04234802, -0.10247694, 0.08606297, -0.00060528243, 0.16025454, -0.113197796, 0.32325915, -0.09360282) * inp_2_0_2; result3 += M4(0.15988652, 0.43129408, 0.003102661, 0.13530524, 0.20106629, 0.35276395, -0.2581822, 0.27712944, 0.092174165, 0.5860203, -0.6644479, -0.30149898, 0.28493616, 0.2263659, -0.103455596, -0.13312685) * inp_2_1_2; result3 += M4(0.05288319, 0.076428, -0.18710244, -0.093296796, -0.21877469, -0.15173075, -0.16969322, 0.05675821, 0.34267446, -0.25818372, -0.3879385, 0.057327595, 0.05748257, -0.44461763, 0.24332774, 0.18331102) * inp_2_2_2; const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0]; const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1]; const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2]; const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0]; const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1]; const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2]; const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0]; const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1]; const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.33035442, 0.008719723, 0.09506913, -0.05596779, 0.6257054, -0.4622823, 0.19758332, -0.009717658, 0.11556571, -0.008927558, 0.0506363, 0.011549255, 0.022223158, 0.16494507, -0.023656936, -0.012043801) * inp_3_0_0; result0 += M4(-0.3018259, -0.16688901, -0.08174471, 0.031225208, -0.3682067, 0.33880973, -0.029750831, 0.052449267, 0.24696672, -0.124532506, -0.040378094, -0.02887898, 0.08495422, -0.10733539, 0.033680625, -0.0040187235) * inp_3_1_0; result0 += M4(0.28583634, 0.06151723, -0.056340285, -0.07114769, 0.40981525, -0.027857127, 0.23838963, -0.121739745, -0.07850837, 0.05227717, -0.03982672, 0.015378227, 0.29068476, -0.04168, 0.045381863, -0.00013727625) * inp_3_2_0; result0 += M4(-0.041488778, 0.10742621, 0.037410427, 0.002719236, 0.23568422, -0.2532835, -0.14904994, -0.02132816, -0.26083633, 0.02395297, 0.013597459, -0.022430388, 0.0039781285, -0.052303746, -0.033815533, -0.08052908) * inp_3_0_1; result0 += M4(0.19898818, -0.062810175, -0.0030155992, -0.014311552, -0.44702345, 0.43076748, 0.13032986, 0.04623066, -0.4599223, 0.39305303, 0.076502815, 0.01207224, -0.2322498, 0.07225618, 0.09567222, 0.074906886) * inp_3_1_1; result0 += M4(0.14233728, 0.04341192, -0.14022432, 0.074965544, -0.42312324, -0.21910575, -0.04191498, -0.16155356, 0.016639965, 0.08997261, -0.030961918, 0.14126752, -0.16792814, 0.10337423, 0.07536254, -0.027254093) * inp_3_2_1; result0 += M4(-0.10881682, 0.0070124045, -0.0734743, 0.02380105, -0.08528868, 0.2052196, 0.28235316, -0.09937192, -0.120403625, -0.18133068, -0.11513659, 0.2849305, -0.093126595, 0.09221593, -0.001545093, 0.118906654) * inp_3_0_2; result0 += M4(0.07010447, -0.002349371, 0.07754369, -0.59997034, 0.057434276, -0.013308527, -0.12963523, 0.44178784, 0.29675305, -0.008670963, -0.14948617, 1.4333277, -0.048508614, 0.077688985, -0.03507625, 0.398176) * inp_3_1_2; result0 += M4(-0.041430607, -0.0056431303, -0.03318371, -0.14079575, 0.08872225, 0.03580239, -0.11937835, -0.055355314, -0.23013176, -0.011193484, -0.0024156924, 0.2336995, -0.08273895, 0.021009317, -0.15586923, 0.20260328) * inp_3_2_2; result1 += M4(0.0513188, -0.06831208, 0.071969114, 0.04630944, -0.10447119, 0.057457622, 0.087569036, 0.111861736, -0.110252105, 0.13196746, 0.07982998, 0.07918412, 0.07207178, 0.06784348, 0.06167251, -0.0048216507) * inp_3_0_0; result1 += M4(-0.11775122, -0.068306886, -0.1314514, -0.022869237, 0.00013730286, 0.119652584, 0.08880258, 0.17652866, 0.041738562, -0.20556526, -0.09664171, -0.04788704, -0.25071782, -0.040533535, 0.007784405, 0.062077902) * inp_3_1_0; result1 += M4(-0.16064279, -0.11079397, 0.10935189, -0.012442423, -0.2100082, 0.16263524, 0.03739031, -0.10447196, 0.08221832, -0.081931755, -0.015653074, -0.03437766, 0.22324844, -0.04340437, -0.03868832, -0.07421524) * inp_3_2_0; result1 += M4(-0.0006676013, 0.040496968, -0.029079301, -0.085848354, 0.32673135, 0.02512588, 0.13175364, -0.27145657, -0.29334694, 0.13588892, -0.023815535, 0.2919419, 0.19565086, 0.4451977, 0.008018119, 0.44960222) * inp_3_0_1; result1 += M4(-0.022897674, 0.022049528, 0.063053586, 0.12857682, -0.08483499, -0.03945858, 0.070367396, 0.106271245, 0.0042700917, -0.02905016, -0.12511766, -0.19536735, -0.3540937, 0.019095879, -0.015849894, 0.06366033) * inp_3_1_1; result1 += M4(-0.01652925, -0.042448483, 0.008769335, -0.07717558, -0.052162528, -0.322415, 0.11697755, -0.014363827, 0.037944168, -0.029346848, 0.027509725, -0.08859583, 0.50523734, -0.026561983, 0.052579824, 0.0062967185) * inp_3_2_1; result1 += M4(0.2831015, 0.29757833, 0.05384404, 0.07900192, -0.039041176, 0.29436517, -0.04795067, 0.28521034, -0.00084280485, -0.8624592, -0.2333652, 0.28047234, 0.13323455, 0.13980593, -0.1040817, 0.41841346) * inp_3_0_2; result1 += M4(-0.056748446, 0.22499406, 0.47074968, 0.0529144, -0.18061697, -0.102058284, -0.45679823, -0.30751905, 0.56470495, -0.6227711, -1.0454521, -0.232476, -0.31827578, -0.16248211, -0.13230376, -0.018044606) * inp_3_1_2; result1 += M4(-0.27055663, 0.040104035, 0.14919373, 0.020736083, 0.01568136, 0.009609554, -0.04672135, -0.019085662, 0.061935153, -0.2874959, -0.22337474, -0.06617894, 0.24911705, -0.18539925, -0.10120979, -0.028026477) * inp_3_2_2; result2 += M4(0.024042394, 0.073262535, 0.29006875, 0.14790326, 0.27991506, 0.08672429, 0.63894117, 0.23894568, 0.029150546, -0.003545572, -0.1935614, 0.31723386, -0.026120424, 0.042341888, 0.021989973, 0.39239773) * inp_3_0_0; result2 += M4(0.12542348, -0.12852398, 0.08891283, -0.14603676, -0.13592839, -0.054619603, 0.016557576, -0.5885332, -0.09488814, -0.07046842, 0.068331644, 0.10981448, -0.007638868, -0.13542384, -0.040854737, -0.005702912) * inp_3_1_0; result2 += M4(0.07082901, 0.3306401, 0.14118254, 0.15615349, 0.0026882638, -0.15104243, 0.16891389, -0.18970816, 0.029838756, 0.13553977, 0.0018116818, -0.05020311, 0.08921652, 0.1348939, 0.043923832, -0.12109734) * inp_3_2_0; result2 += M4(-0.034977324, 0.020053409, -0.37609595, -0.054664034, 0.15570554, 0.28191614, -0.6908297, 0.14370124, 0.08973948, -0.14156595, 0.041179802, -0.0043907193, 0.0063222884, 0.08350338, 0.0016131507, -0.056665104) * inp_3_0_1; result2 += M4(-0.10248851, -0.1634687, -0.26851672, -0.46094584, 0.13998966, -0.2924333, 0.07655084, 0.1363661, -0.13416262, 0.08360797, -0.0077458336, 0.052660167, 0.064197175, -0.35289478, -0.004854883, 0.09082584) * inp_3_1_1; result2 += M4(-0.095027156, 0.04068739, 0.03084716, -0.03596515, -0.13508861, 0.04214646, -0.3288225, 0.13073072, 0.1476837, 0.22203143, 0.14448355, -0.24459559, -0.17169268, -0.15294273, -0.10575014, 0.043388218) * inp_3_2_1; result2 += M4(0.029903082, 0.061882522, 0.008157312, 0.36121556, -0.07368661, 0.08575936, 0.35597792, 0.34465766, 0.104644366, -0.11097311, 0.17444994, -0.10731304, 0.1298015, -0.025148133, 0.1390958, 0.045870513) * inp_3_0_2; result2 += M4(-0.368986, -0.10517477, -0.11623787, 0.16291271, 0.27432486, -0.06225037, -0.43164015, -0.07802707, 0.6406274, 0.15081723, 0.038465872, 0.071604036, 0.19591734, -0.15255238, 0.13270366, -0.0070306635) * inp_3_1_2; result2 += M4(-0.43216032, -0.039325733, 0.11141706, 0.12786095, 0.018126328, -0.07048143, 0.16246274, -0.09758973, 0.21826117, -0.11532947, -0.16299699, -0.035572365, 0.13867931, 0.18741262, -0.030538183, -0.04330623) * inp_3_2_2; result3 += M4(-0.016080486, 0.032022443, -0.165704, 0.07246584, -0.08014878, -0.35844275, 0.113061614, 0.065997146, 0.02371046, -0.0008155743, 0.07187625, -0.08876956, -0.021193719, -0.033109672, -0.07381202, -0.1332826) * inp_3_0_0; result3 += M4(0.04180735, -0.10776659, -0.027806621, 0.03553281, -0.023503449, 0.0169977, -0.27451426, 0.13919999, -0.042250317, 0.08521528, 0.17336805, 0.094439976, 0.18615168, 0.0021308872, -0.11744622, -0.0066396925) * inp_3_1_0; result3 += M4(-0.15732574, 0.032522585, -0.09444964, 0.01659268, -0.09113259, 0.1305735, -0.11529496, 0.06343632, -0.046901505, -0.05037784, -0.07369559, -0.003344642, 0.07115879, 0.024641449, -0.08119741, -0.0030102434) * inp_3_2_0; result3 += M4(-0.053414203, 0.29699224, 0.22554293, -0.009535028, -0.034720074, 0.2417781, 0.24099764, -0.4626596, 0.13475932, -0.08492695, 0.015628448, -0.20312022, 0.10430628, -0.012566143, 0.055538982, -0.3632165) * inp_3_0_1; result3 += M4(0.15908659, -0.048262477, -0.03419863, 0.1971502, 0.59373474, 0.040420167, -0.006936596, -0.02301966, 0.11121678, 0.12800483, 0.26782277, 0.22499534, 0.13175733, 0.031205691, -0.024721615, -0.60766846) * inp_3_1_1; result3 += M4(-0.120825715, 0.15550387, 0.2964152, 0.045677517, -0.16462475, 0.082349055, 0.025853585, 0.038958352, -0.3481213, 0.024049724, -0.31659842, 0.16717331, 0.1698046, 0.08965835, 0.26032227, 0.0028352945) * inp_3_2_1; result3 += M4(0.048251737, -0.3381504, -0.06695844, -0.157471, 0.00037351192, -0.23412548, -0.046711236, -0.07236172, 0.006832392, 0.35861197, -0.20628686, -0.060990233, 0.082007684, -0.17143841, -0.121094696, -0.1463941) * inp_3_0_2; result3 += M4(0.09094426, -0.522618, 0.32228535, -0.094180234, -0.119871415, 0.044149384, -0.47617498, 0.009163124, 0.12564674, 0.5748051, -0.43634674, 0.10976416, 0.099546134, 0.0657549, 0.03073671, -0.026292121) * inp_3_1_2; result3 += M4(-0.19512002, -0.29417554, 0.16560094, 0.06612767, 0.061374467, -0.08437194, 0.2300457, 0.13011362, -0.19942062, 0.13436554, -0.27281252, -0.03332893, -0.028906567, 0.13050336, -0.08008115, 0.07068302) * inp_3_2_2; const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2); imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0))); imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0))); } //!DESC ArtCNN C4F16 (Conv2D-2-ReLU) //!COMPUTE 24 32 12 16 //!HOOK LUMA //!BIND conv2d_1 //!SAVE conv2d_2 //!WIDTH LUMA.w 2.0 * //!HEIGHT LUMA.h 2.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif const ivec2 ksize = ivec2(3, 3); const ivec2 offset = ksize / 2; const ivec2 wg_size = ivec2(gl_WorkGroupSize); const ivec2 isize = wg_size + ksize - 1; shared V4 inp[4][isize.y][isize.x]; void hook() { const uvec2 local_xy = gl_LocalInvocationID.xy; ivec2 base = ivec2(gl_WorkGroupID) * wg_size; for (uint y = local_xy.y; y < isize.y; y += wg_size.y) { for (uint x = local_xy.x; x < isize.x; x += wg_size.x) { const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2); inp[0][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(0, 0), 0)); inp[1][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(1, 0), 0)); inp[2][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(0, 1), 0)); inp[3][y][x] = V4(conv2d_1_mul * texelFetch(conv2d_1_raw, input_base + ivec2(1, 1), 0)); } } barrier(); V4 result0 = V4(0.007665273, -0.012706782, -0.01845592, 0.026948148); V4 result1 = V4(-0.004221884, 0.016139362, -0.014981546, 0.016426912); V4 result2 = V4(0.023174407, 0.05587216, -0.036808174, -0.002717008); V4 result3 = V4(-0.036521614, -0.027948106, 0.005269135, 0.016757237); const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0]; const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1]; const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2]; const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0]; const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1]; const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2]; const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0]; const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1]; const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.039672762, 0.14756373, -0.08448369, 0.017731339, 0.025569528, -0.040744573, 0.013367848, -0.019573305, -0.16450569, 0.33285397, -0.11215605, -0.028131716, 0.016628815, 0.023166431, -0.00037328724, -0.08177501) * inp_0_0_0; result0 += M4(-0.065025754, -0.104294226, 0.094719954, 0.038185492, -0.09666806, 0.027351536, -0.038861245, 0.004919656, 0.19828482, -0.22604604, 0.024477452, -0.10591664, -0.19293559, 0.4599996, 0.04217503, 0.16875263) * inp_0_1_0; result0 += M4(0.11909007, 0.022196636, 0.034610387, -0.018519526, -0.06364373, -0.008448898, 0.015312234, -0.0006501346, 0.016660782, 0.11387573, -0.049037285, 0.024658464, 0.034969676, 0.10625149, 0.053557772, 0.020835) * inp_0_2_0; result0 += M4(-0.0125005925, -0.30383512, 0.118058, -0.0622122, -0.053739194, 0.02854181, 0.021767799, -0.023679044, 0.1428111, 0.22441542, 0.13784951, 0.055275932, 0.06970613, -0.03743594, -0.049048167, -0.018928634) * inp_0_0_1; result0 += M4(-0.061673142, 0.35783702, -0.2123013, -0.0029690831, -0.028315723, -0.014267534, -0.029230714, 0.033998776, -0.017710077, -0.12725465, 0.055828135, 0.16664071, -0.6307306, 0.087453276, -0.24898805, 0.04119017) * inp_0_1_1; result0 += M4(-0.046845675, -0.034220554, 0.085932806, 0.083577596, -0.014423473, 0.24495147, 0.029078502, 0.05503181, -0.16131906, -0.10079813, 0.042977642, -0.0016745959, -0.65428495, 0.022565061, 0.067007095, 0.03766914) * inp_0_2_1; result0 += M4(0.06490114, 0.2199838, -0.22797251, -0.23349749, -0.010567795, 0.07199016, 0.0155491065, 0.04098838, 0.101175986, 0.12190727, -0.072651304, -0.14695418, 0.090389036, -0.28267708, 0.05694964, -0.012954298) * inp_0_0_2; result0 += M4(0.47807366, -0.030264005, 0.49982014, 0.33034417, 0.11898345, 0.19849163, 0.09678157, -0.08528741, -0.2896773, -0.18903524, -0.19259109, -0.20551237, 0.12084159, 0.02699669, 0.014437785, 0.032559108) * inp_0_1_2; result0 += M4(-0.45953196, -0.099686995, -0.48402756, -0.29487053, 0.048139166, 0.025708836, -0.06933556, -0.0635896, 0.27754197, 0.049773578, 0.12305871, 0.106278256, -0.2843809, 0.073834695, -0.0054634786, 0.007431036) * inp_0_2_2; result1 += M4(0.085994475, -0.1193417, 0.0071456973, -0.0188979, 0.04183363, 0.12310363, 0.05413761, 0.04909667, -0.30297938, 0.013713022, 0.033152174, -0.11379657, -0.18750672, -0.22004475, 0.072046556, 0.35643157) * inp_0_0_0; result1 += M4(-0.044690162, -0.058702532, 0.007017294, -0.041176673, 0.048408784, 0.080121264, 0.14161503, 0.12842357, 0.16440009, -0.21827607, 0.108833574, -0.044027567, -0.7746953, -0.8959941, 0.12757428, -0.1544348) * inp_0_1_0; result1 += M4(0.042872313, 0.14205654, -0.03782293, 0.14235368, 0.130305, 0.009863965, -0.036303528, 0.1701927, -0.032293867, 0.22456177, -0.062228113, 0.2358024, -0.16976565, -0.43414745, -0.21161576, 0.099397495) * inp_0_2_0; result1 += M4(-0.17363705, 0.0061433585, -0.13879266, 0.051511798, -0.022545822, 0.10734877, 0.06815579, -0.027122574, 0.04138703, -0.15504237, 0.16342735, -0.072404325, -0.20245026, -0.052299656, -0.02321829, 0.002982435) * inp_0_0_1; result1 += M4(0.02651482, 0.12583537, 0.45761207, -0.095229685, -0.09167909, 0.25368944, 0.1494136, 0.017393095, -0.22577058, -0.07560319, -0.5818347, -0.5617876, -0.20395924, -0.37043095, 0.25695607, 0.45370623) * inp_0_1_1; result1 += M4(0.15493771, -0.1451798, -0.32017693, 0.106086396, 0.14634423, 0.22603515, 0.07768741, -0.010450286, 0.002767987, 0.46913233, 0.23929594, 0.44290265, -0.05627698, -0.33913442, 0.029245248, -0.02599956) * inp_0_2_1; result1 += M4(-0.0021248949, 0.3092833, 0.15976787, 0.17359373, 0.12920043, 0.106215686, 0.111749046, 0.032766804, 0.0702469, -0.12661065, -0.1285257, -0.08318957, -0.06495591, 0.021884361, -0.043050367, -0.048703246) * inp_0_0_2; result1 += M4(-0.20655625, -0.51618236, -1.0094728, -1.0106137, 0.18894927, 0.06065753, 0.12400759, -0.07092802, 0.30361733, -0.16834623, -0.015209634, -0.0769189, -0.21808125, -0.07767393, 0.054584887, -0.3680687) * inp_0_1_2; result1 += M4(-0.33314708, -0.15586081, 0.6099683, 0.13461177, 0.057432342, 0.22775277, 0.21322937, 0.0532397, 0.21244651, -0.0720669, 0.020412793, 0.037264194, 0.06017442, -0.2044945, 0.040000513, 0.19136631) * inp_0_2_2; result2 += M4(-0.022256808, 0.04888811, -0.022015247, -0.092115186, 0.020818705, -0.028380645, 0.06185778, 0.08197749, -0.15860432, 0.108361594, -0.19694375, 0.07381369, 0.15101261, 0.14137074, -0.088994004, -0.5152534) * inp_0_0_0; result2 += M4(0.06987822, -0.082000196, 0.02866381, 0.02108778, -0.051829483, -0.10683904, 0.12230972, 0.06575793, 0.090399876, -0.09562653, 0.043466404, 0.045909394, -0.020149088, -0.023214703, -0.30334076, -0.152464) * inp_0_1_0; result2 += M4(0.0811315, -0.031609118, 0.01223644, 0.037147917, -0.18350898, -0.07458222, 0.089401655, 0.04162907, 0.103615806, -0.10315151, -0.07726605, 0.19025756, 0.1799963, -0.01810218, -0.14209187, -0.16685213) * inp_0_2_0; result2 += M4(0.36047256, 0.1324033, -0.035303097, -0.029761754, -0.08453878, -0.092538394, 0.048813272, 0.2952445, -0.16890605, -0.24446781, -0.2384415, -0.051971577, 0.17304648, 0.015542306, -0.4414377, -0.20117971) * inp_0_0_1; result2 += M4(-0.045148943, -0.3111919, 0.11992417, -0.027939236, -0.18373263, -0.015571242, 0.16786881, 0.19525638, 0.017805958, -0.23059563, -0.410443, -0.16854092, 0.1441553, -0.45532706, -3.1361716, -0.3325092) * inp_0_1_1; result2 += M4(-0.38618526, 0.13740632, -0.08181973, 0.03373343, -0.14354973, -0.21948239, 0.18439542, 0.15648417, -0.085576214, 0.26852188, -0.26260558, 0.039526124, 0.25491875, 0.045001876, 0.11680835, -0.30998042) * inp_0_2_1; result2 += M4(-0.22676072, -0.19009703, 0.22159863, -0.30039275, -0.02362561, -0.11382714, 0.12698817, 0.44949046, 0.26380983, 0.025303096, -0.06650232, 0.036743466, -0.23880884, 0.034025613, 0.016137755, -0.020864155) * inp_0_0_2; result2 += M4(0.35370076, 1.4717959, -0.39453936, 0.012876503, -0.22790988, 0.013500226, -0.023526462, 0.40373355, -0.12799382, -0.35260844, 1.2150608, -0.2795026, 0.09862312, -0.31711802, -0.38266385, -0.4625872) * inp_0_1_2; result2 += M4(-0.006961403, -0.38303775, -0.023807812, -0.032239847, 0.1080835, -0.20735481, 0.20969796, 0.23094925, -0.05239786, 0.12599377, 0.20246367, 0.14485277, 0.13914481, 0.018579604, -0.0628238, -0.22654083) * inp_0_2_2; result3 += M4(-0.008393504, 0.06399756, -0.0042330017, -0.038735587, 0.009515421, -0.009107273, 0.033609197, 0.017786147, -0.056026146, -0.015695263, -0.101369716, -0.047951315, -0.0045006033, -0.024499964, -0.059617385, -0.050731726) * inp_0_0_0; result3 += M4(0.05816133, -0.016608475, -0.008917803, 0.0039858357, 0.04840855, -0.03989783, 0.021003753, 0.11802983, 0.04186096, -0.02740838, 0.03711145, -0.15018326, 0.06298314, -0.13889475, 0.0056237825, -0.1520681) * inp_0_1_0; result3 += M4(-0.0464783, 0.0043105236, 0.05190762, -0.016212137, 0.08643694, 0.044873476, 0.01192245, 0.073939085, -0.050208796, -0.06378546, 0.040115822, -0.10028227, 0.17046078, 0.104430355, 0.04660742, 0.004472599) * inp_0_2_0; result3 += M4(0.01869681, 0.17765687, -0.01132854, -0.028719053, 0.03712774, -0.03786645, 0.021126691, 0.03835777, 0.07792008, 0.21017025, -0.017897833, -0.09897833, -0.08394025, 0.14385825, 0.021313882, -0.0965509) * inp_0_0_1; result3 += M4(0.26953533, -0.046034764, -0.04922047, 0.2146039, -0.06167827, 0.10177388, -0.00028696132, 0.24834304, 0.21333528, 0.09260683, -0.062288165, 0.011650751, -0.35006827, 0.34293538, 0.45763996, -0.2745375) * inp_0_1_1; result3 += M4(-0.248477, 0.13341399, 0.046218965, -0.23830393, 0.046279684, -0.04249783, 0.05034412, 0.22284749, 0.19501184, -0.013352284, -0.08176439, 0.062934145, -0.38506672, 0.22943279, 0.09323349, 0.055179156) * inp_0_2_1; result3 += M4(-0.16656476, -0.9840606, -0.20659976, -0.763574, -0.043892097, -0.44524747, -0.019569376, 0.08370716, -0.00852045, -3.123414, -0.08242622, 0.15859091, 0.057629827, 0.21355532, 0.048139296, 0.13085455) * inp_0_0_2; result3 += M4(0.16033624, -0.8272528, 0.20588383, 0.8193101, 0.07821069, -0.8910632, 0.08296487, 0.28125536, -0.024175977, -3.1040792, 0.06526852, -0.1698219, -0.008097565, 0.17878205, -0.021604018, -0.03205016) * inp_0_1_2; result3 += M4(0.30415177, -0.04037523, -0.30204728, 0.1479439, 0.08117435, -0.59381247, -0.028565824, 0.3110151, 0.07412945, -2.6523457, 0.13584737, -0.14663945, -0.13277556, 0.3445814, -0.08542846, 0.10787985) * inp_0_2_2; const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0]; const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1]; const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2]; const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0]; const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1]; const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2]; const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0]; const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1]; const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.014861815, 0.066454895, -0.018906003, -0.023595516, -0.016467934, 0.08245411, 0.058886092, -0.00517167, 0.0736858, -0.33098358, -0.053136736, 0.042932037, -0.023391662, -0.0154059855, 0.0027027188, 0.04671472) * inp_1_0_0; result0 += M4(0.12285722, 0.02999312, -0.024734395, 0.052841812, -0.07606859, 0.532711, 0.029948376, 0.07419315, -0.1194898, -0.7954723, -0.117610976, -0.0037800272, 0.046508733, 0.15742284, -0.030505521, -0.06849125) * inp_1_1_0; result0 += M4(0.10988922, -0.021709241, 0.0027044513, -0.087975435, -0.037619695, 0.1729772, 0.043288194, -0.00652201, 0.215917, -0.13082863, 0.0029795505, 0.026782269, 0.18413892, 0.009974911, 0.07329704, 0.057529766) * inp_1_2_0; result0 += M4(-0.068852276, -0.015672447, 0.08639786, 0.015099182, -0.14214975, 0.03751679, 0.01517611, 0.017874567, 0.010275957, -0.33464134, 0.023621306, 0.06633157, 0.18977083, 0.014755853, -0.021573516, -0.025924493) * inp_1_0_1; result0 += M4(0.18194625, 0.21327859, -0.022046618, -0.0863755, -0.10154144, 0.11552613, -0.02150014, 0.08803059, 0.33497876, 0.11428454, 0.7498792, 0.24571484, 0.11873142, -0.40479037, 0.12675676, 0.076739) * inp_1_1_1; result0 += M4(-0.38466164, -0.03697603, -0.037561305, -0.043681134, -0.6816189, 0.13083659, 0.07451801, -0.003327551, 0.5126916, -0.09917474, -0.0708578, 0.01648595, 0.423934, -0.34497714, -0.02784395, 0.010498842) * inp_1_2_1; result0 += M4(-0.08783385, 0.033599894, 0.006346594, -0.006446634, -0.016138535, 0.045155555, -0.04009477, -0.018183654, -0.013776878, -0.014323365, 0.05841988, 0.076961026, -0.047537193, -0.0663202, 0.03427056, 0.03428585) * inp_1_0_2; result0 += M4(-0.09914099, -0.032711275, 0.097858556, 0.117271565, -0.056332853, 0.09818337, -0.05306024, -0.04881118, 0.23671569, 0.02282729, 0.025347842, 0.03629352, -0.075708866, -0.023583038, 0.03354702, 0.007265923) * inp_1_1_2; result0 += M4(0.36997548, 0.052172437, -0.023425005, 0.042993546, -0.08452691, 0.12523419, -0.044003073, -0.0430429, 0.016587403, -0.14865886, -0.03721655, -0.031733558, 0.021108441, -0.041968603, 0.0120479325, 0.0014579248) * inp_1_2_2; result1 += M4(0.32410297, 0.16156995, -0.2704675, -0.15474492, -0.0051417057, 0.0736679, -0.027583983, 0.042698298, 0.029605344, -0.097892836, -0.042276148, -0.17086019, 0.05914977, -0.0958991, 0.024540406, -0.014890719) * inp_1_0_0; result1 += M4(-0.17057064, -0.06878893, 0.09484002, -0.46547592, 0.16778767, 0.21305582, 0.055226404, 0.14098307, -0.24932669, -0.6438276, 0.1469775, 0.15965456, 0.13635448, -0.10718979, 0.08083247, -0.47670645) * inp_1_1_0; result1 += M4(0.033564653, 0.10775614, -0.082825914, -0.10517869, 0.19951133, 1.1163155, -0.15247913, 0.20495285, 0.057683297, -0.844752, 0.16111496, -0.44034448, -0.20139556, -0.9145029, -0.13836655, 0.1408168) * inp_1_2_0; result1 += M4(0.103660226, 0.21737917, -0.16172896, 0.13338298, -0.018703043, -0.07260912, -0.040737107, 0.034949742, -0.19936053, 0.051139116, -0.020942764, 0.009280968, 0.027868485, 0.002934873, -0.07566193, -0.104706034) * inp_1_0_1; result1 += M4(0.35724416, -0.077911, 0.09947001, -0.6117375, 0.21342002, 0.060938604, -0.20370094, 0.3130246, -0.47341087, -0.30221367, 0.4240177, -0.0758348, -0.12976281, 0.024794862, -0.07694939, 0.101710096) * inp_1_1_1; result1 += M4(-0.14973663, -0.2178218, -0.022833738, 0.5495507, -0.014890054, 0.36719897, 0.0068603246, 0.8096376, 0.054707617, -0.11853919, -0.20341462, -0.7495799, -0.03551902, -0.3992545, -0.40176797, -0.058236588) * inp_1_2_1; result1 += M4(0.1114995, 0.018287987, -0.1068866, 0.30490544, 0.026842179, 0.09772784, 0.021783939, 0.06514757, -0.02939784, 0.0063857757, 0.052597906, -0.029246058, 0.03607791, -0.026882924, 0.0045780633, -0.015172184) * inp_1_0_2; result1 += M4(-0.040682446, -0.045952864, 0.14679994, 0.3277869, -0.010540915, -0.075695984, -0.047419466, 0.34779298, 0.02479446, -0.020837042, -0.0089015765, -0.34170833, 0.08684945, -0.019799326, 0.009023451, -0.073233105) * inp_1_1_2; result1 += M4(-0.05574628, 0.18257698, 0.121048525, -0.3028285, 0.021456659, -0.1035951, -0.032101694, 0.4428228, -0.14607942, 0.017202685, -0.027647339, -0.24124599, 0.05897191, -0.095147684, -0.052099764, -0.19641311) * inp_1_2_2; result2 += M4(-0.0677571, -0.14728205, 0.2918349, 0.17206684, 0.09093841, 0.017973123, -0.0043883957, -0.15070698, 0.13623445, -0.03267367, -0.035259522, 0.2057031, 0.10146496, -0.043358415, -0.059748214, 0.09960731) * inp_1_0_0; result2 += M4(-0.084727354, 0.11521498, 0.00149433, 0.027532067, 0.048819114, -0.105092816, -0.009181161, -0.13722752, -0.44462013, -0.22134246, 0.10355644, 0.08042798, -0.004623407, -0.16590333, -0.11398178, 0.008029647) * inp_1_1_0; result2 += M4(-0.0021335448, -0.15116176, 0.20302649, 0.055204622, 0.07024495, 0.056478035, 0.09759212, 0.033892937, 0.0037432178, 0.07928205, -0.04295959, 0.11223701, -0.22060403, -0.042706113, -0.28266382, -0.025342258) * inp_1_2_0; result2 += M4(-0.18853788, -0.37525776, 0.33662406, 0.0021643888, 0.046762835, 0.043696124, -0.0014363487, -0.28293067, -0.16980772, 0.048948396, 0.3054481, -0.25124067, -0.12796396, -0.13256674, 0.061534125, 0.13214476) * inp_1_0_1; result2 += M4(0.15168403, -0.30957776, -0.2828841, 0.25446436, 0.55487597, -0.07893502, -0.5719077, -0.7259194, -0.109957784, 0.36705047, 0.23906712, 0.019667521, -0.15451595, 0.08622951, 0.4389705, 0.56009406) * inp_1_1_1; result2 += M4(-0.062005397, 0.08156719, -0.16399525, -0.06189081, 0.06525984, 0.09745813, 0.39185095, -0.29123664, -0.15941338, 0.08956102, -0.21950622, 0.11557442, 0.38353565, 0.1466502, -4.3374176, 0.34140408) * inp_1_2_1; result2 += M4(0.092709705, -0.15493768, -0.012704912, -0.117980175, 0.07436645, -7.8802695e-05, -0.058275957, -0.18827915, -0.0631416, -0.0373527, -0.11127798, -0.009910854, -0.011067589, -0.0053280033, 0.022658903, 0.06333271) * inp_1_0_2; result2 += M4(0.0916497, 0.1943221, 0.37540963, -0.046307754, 0.00953411, 0.066546984, 0.2116024, -0.5158029, -0.032494158, 0.14182144, -1.8395805, 0.3173581, -0.09340417, -0.10758007, -0.15254256, 0.32740375) * inp_1_1_2; result2 += M4(0.025273405, 0.08492187, -0.14413005, -0.05747447, 0.089801, -0.03564784, -2.2446504, -0.4472199, -0.12965105, 0.009225538, 0.10549652, 0.11036174, -0.115849234, -0.099777445, 0.12770584, 0.31634042) * inp_1_2_2; result3 += M4(0.021053081, -0.055306282, -0.081807986, -0.21331595, -0.000998926, 0.008907352, 0.016980026, 0.009302538, 0.04861071, -0.0046227667, -0.0016314664, -0.07899752, -0.078419186, -0.11081667, -0.030900553, 0.02087226) * inp_1_0_0; result3 += M4(0.1846625, 0.099488035, -0.05193646, -0.095876545, -0.0011387762, 0.015150568, -0.010261407, -0.013941359, -0.042803578, 0.059049208, -0.07620146, -0.102711946, -0.034578953, 0.20519276, 0.097422205, 0.025764102) * inp_1_1_0; result3 += M4(-0.43959314, 0.16758272, 0.035562072, -0.07742551, -0.38682875, 0.046922028, 0.08865273, -0.044875767, -0.80026156, -0.06039983, 0.024169726, -0.13747011, -0.55590475, -0.23130962, -0.04149803, -0.08241675) * inp_1_2_0; result3 += M4(0.030927973, -0.6119503, 0.10466902, -0.12390426, 0.034730162, -0.10333266, 0.02035231, 0.013106206, -0.05916547, -0.777083, -0.006414744, -0.011363021, 0.035848703, 0.36386737, -0.002772792, -0.0480502) * inp_1_0_1; result3 += M4(0.5065, 0.19396274, 0.14151096, -0.35221586, 0.09600475, -0.014168858, -0.034725133, 0.05604017, -0.13955177, -1.0448937, -0.19251399, 0.007042205, 0.14856394, 0.38436663, -0.0064019714, -0.014204183) * inp_1_1_1; result3 += M4(-0.061922383, -0.12842242, -0.07295167, -0.0075706756, 0.10817202, -0.1655033, 0.07266308, -0.022521412, -0.093321756, -0.876699, -0.018780176, -0.14927478, -0.03584319, -0.5748604, -0.024849145, -0.17630738) * inp_1_2_1; result3 += M4(-0.025384348, 0.49858052, -0.075553015, 0.025753077, 0.002560775, -0.37545115, -0.028084433, 0.030594576, 0.0049141925, 0.15890612, 0.08069018, -0.08922643, 0.036730718, -0.2823979, 0.031380307, 0.031606745) * inp_1_0_2; result3 += M4(0.23545441, -1.2197607, -0.025337541, 0.21307686, -0.09559411, -0.8597017, 0.034877297, -0.015559628, 0.058392383, 0.3864891, -0.053282253, -0.055226818, -0.007128761, -1.8220773, 0.02058869, 0.017222092) * inp_1_1_2; result3 += M4(0.080354765, -0.09153391, 0.029530875, 0.03370949, -0.07208025, 0.2936868, -0.05797227, 0.013, 0.052441202, -1.7494535, 0.052883293, 0.008096113, -0.028412474, -2.4877417, 0.005800512, 0.025508739) * inp_1_2_2; const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0]; const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1]; const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2]; const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0]; const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1]; const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2]; const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0]; const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1]; const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.03952951, 0.038941756, 0.09056351, 0.097450286, -0.1596078, -0.1410198, 0.168562, -0.026641607, -0.054110724, -0.0563214, 0.034391247, 0.021438785, 0.0057072574, -0.107405394, -0.04044469, 0.044145353) * inp_2_0_0; result0 += M4(0.11733438, -0.20137449, -0.040039547, 0.02093038, -0.2984962, 0.2671289, -0.19594978, -0.06000205, -0.041333724, -0.114821054, 0.00012655034, -0.018803762, 0.02485363, -0.010562574, 0.06306494, -0.0020644043) * inp_2_1_0; result0 += M4(-0.036415216, 0.10247068, -0.030816095, -0.040820822, -0.04745717, 0.020836288, 0.06765466, 0.007323091, -0.06571431, 0.027249105, -0.051204715, -0.019635567, 0.05453804, -0.12017959, -0.035392873, 0.028308189) * inp_2_2_0; result0 += M4(-0.04156831, 0.28028694, 0.06831695, 0.022350814, 0.13675615, 0.15137945, -0.0084258355, 0.53870726, -0.07335624, 0.00800944, 0.012506781, 0.057974383, 0.013222071, -0.14250757, -0.007383215, 0.031982966) * inp_2_0_1; result0 += M4(0.65670925, -0.30265868, 0.2362948, 0.12972662, 0.20985337, -0.45745087, 0.22702388, 0.083776616, -0.06499208, 0.03460006, -0.041461278, -0.025413781, 0.019352706, -0.45709613, 0.014837353, 0.027197322) * inp_2_1_1; result0 += M4(0.57660085, -0.09744273, -0.009441254, -0.03757566, 0.019752283, -0.067605585, -0.009568382, 0.07353522, -0.064801805, 0.13704818, -0.034330864, 0.0051319897, 0.20990741, -0.33425754, -0.016691428, -0.028612768) * inp_2_2_1; result0 += M4(-0.10979933, 0.18432014, -0.074066855, 0.0054639196, 0.098094076, 0.064506084, 0.26912084, 0.16957463, 0.07098004, 0.023026804, 0.071376055, 0.034128394, 0.059867695, -0.08107985, 0.10168895, 0.08820417) * inp_2_0_2; result0 += M4(-0.2552831, -0.052631672, 0.050921284, -0.016037157, -0.116781406, -0.07262549, -0.0022834113, -0.088421635, 0.15951507, 0.00042410966, 0.07465069, 0.0019270786, 0.10275644, -0.3381453, 0.24874882, 0.10901488) * inp_2_1_2; result0 += M4(0.36554116, -0.027189862, 0.0032612395, 0.004169092, -0.042129308, 0.044692904, 0.040775307, 0.010006452, -0.021286374, 0.073701605, -0.0044975975, 0.0043594157, 0.54600775, -0.14487681, 0.062218707, 0.01976116) * inp_2_2_2; result1 += M4(0.014763889, 0.11339801, 0.032690678, -0.08592508, 0.103032224, -0.09282646, -0.098232545, 0.3981169, -0.03748985, 0.06707042, 0.019140782, 0.06538452, 0.0027186018, -0.0030862922, 0.029157702, -0.006268967) * inp_2_0_0; result1 += M4(-0.16860075, -0.18705767, 0.29325658, 0.07206259, -0.2872574, 0.5612131, 0.04638324, 0.49310178, -0.19112644, 0.17206986, 0.0029015448, -0.059934225, 0.028581057, -0.13357182, -0.038109895, 0.0005232052) * inp_2_1_0; result1 += M4(0.15277332, 0.007269086, 9.9966186e-05, 0.23408552, 0.14690994, -0.058067933, 0.008219528, 0.19074975, -0.1848797, 0.024352368, 0.05464677, 0.06272763, 0.008818596, 0.08902247, 0.035453882, -0.11298582) * inp_2_2_0; result1 += M4(0.15972948, -0.15435955, 0.053044003, -0.048703156, 0.07449017, 0.52491075, 0.29206496, -0.09729822, -0.14646658, 0.11030144, 0.08527744, 0.10426317, -0.13405898, 0.060408086, 0.06603381, -0.0022193235) * inp_2_0_1; result1 += M4(0.26860082, 0.7463653, -0.14953612, -0.47049257, -0.031829864, -0.8885134, 0.14411287, -1.1775255, 0.014824517, 0.16122791, 0.1182362, 0.1877675, -0.027240504, 0.21426389, -0.030941296, -0.14024088) * inp_2_1_1; result1 += M4(-0.05250066, 0.18955119, -0.00546335, 0.2086277, 0.026105952, 0.09914598, -0.120411806, -0.24279192, -0.15098634, 0.12038881, 0.24426264, 0.08256465, -0.1532883, 0.08571956, -0.05187043, -0.50765544) * inp_2_2_1; result1 += M4(0.1040866, -0.05975824, 0.023336973, -0.032634534, -0.05037019, 0.14335546, 0.12934166, 0.6264734, -0.03757042, 0.113765046, 0.07949648, 0.14570814, -0.041041322, 0.060453493, 0.08972462, 0.048948) * inp_2_0_2; result1 += M4(-0.00400568, 0.06874304, -0.06918383, 0.535076, 0.20119326, 0.4890317, 0.08677889, 0.06478608, -0.071693674, 0.14588821, 0.000260698, 0.06933203, 0.012584373, 0.1483624, 0.17692986, -0.21019499) * inp_2_1_2; result1 += M4(-0.0102418875, 0.10566752, -0.061612416, -0.28907233, 0.0599261, -0.07173808, 0.046218887, -0.09600095, -0.1028075, 0.20061518, 0.07601999, 0.15499063, -0.029355908, 0.16779986, -0.15463807, -0.5739547) * inp_2_2_2; result2 += M4(0.19928946, -0.033282038, 0.057147387, 0.115399025, 0.020315133, 0.02849115, 0.09572222, -0.044312302, 0.006143764, 0.12822536, 0.097994566, -0.124979936, -0.0746315, 0.026666824, 0.004227371, 0.08828524) * inp_2_0_0; result2 += M4(0.19336012, 0.37895447, 0.15984185, 0.09104523, 0.124388486, 0.048203543, 0.2084745, 0.11253485, -0.07148966, 0.14339097, 0.08032688, -0.17872952, -0.049673792, -0.008513587, 0.025794912, 0.03211732) * inp_2_1_0; result2 += M4(-0.26330376, 0.19026726, 0.1857569, 0.1315198, 0.064501986, 0.10317745, -0.0176552, 0.02821371, -0.18644843, 0.1889562, 0.10001778, -0.104555264, 0.064582534, 0.06963303, 0.0025069967, -0.0014127698) * inp_2_2_0; result2 += M4(0.066179745, 0.013192936, 0.030699657, 0.044947155, -0.29488063, -0.17109951, 0.1126733, 0.14376812, -0.078351244, 0.14674957, 0.07026974, -0.16070388, -0.2022285, 0.0499185, 0.03827287, 0.116912186) * inp_2_0_1; result2 += M4(-0.073056035, 0.3671889, 0.25542954, -0.17342108, 0.025615497, -0.12021865, 0.13177577, -0.2007023, -0.13627052, 0.2357552, 0.13015415, -0.35278785, -0.43087515, 0.169868, 0.06493807, 0.04443809) * inp_2_1_1; result2 += M4(-0.045416348, -0.025851315, 0.007347807, -0.004663971, -0.062454782, -0.038503435, -0.10300142, -0.050885145, -0.22055164, 0.079387955, 0.27758706, -0.20710965, -0.20509078, 0.24001168, 0.04358121, -0.028955676) * inp_2_2_1; result2 += M4(0.26075888, 0.013541241, 0.06371779, -0.051259648, -0.110951304, 0.039574105, 0.56873083, 0.084856056, -0.11592972, 0.068496436, 0.12595423, -0.09547746, -0.13057609, 0.07384179, 0.014910358, 0.07805904) * inp_2_0_2; result2 += M4(-0.12786224, 0.26142845, 0.24205787, -0.18063907, -0.320074, 0.08904255, -0.41527358, -0.21437109, -0.0011689005, 0.1619207, 0.12261965, -0.45217648, -0.52569383, 0.107322425, 0.19049393, 0.08988259) * inp_2_1_2; result2 += M4(-0.09822684, -0.045753952, -0.009845761, 0.14947642, 0.06210613, -0.06658182, 0.2632897, 0.08839577, 0.09217471, 0.11952383, -0.01280119, -0.3180698, -0.4636886, 0.10647941, -0.05618376, 0.095478706) * inp_2_2_2; result3 += M4(-0.07195442, -0.026333254, 0.12311325, 0.12909564, 0.084573925, -0.054440748, 0.053060867, 0.17208287, 0.05446062, -0.03114994, 0.044506367, -0.026369767, 0.01629298, -0.007789797, -0.018678132, -0.06553193) * inp_2_0_0; result3 += M4(-0.24843477, -0.042103805, -0.018636895, 0.09162976, 0.08486336, 0.08406515, -0.10784883, 0.21382321, 0.07368658, 0.006697408, 0.028386468, -0.09322328, -0.012220106, 0.044635873, 0.045417525, -0.0190928) * inp_2_1_0; result3 += M4(-0.026487457, -0.019765217, -0.055956613, -0.12057359, 0.061001785, -0.011873475, 0.019730749, -0.020624056, 0.0407704, 0.03941482, -0.050869886, -0.07624975, -0.02593104, 0.003896486, -0.019085458, -0.025894178) * inp_2_2_0; result3 += M4(0.11962663, 0.27838814, 0.061310023, 0.17531951, -0.07656211, 0.065626204, 0.14210185, 0.12210379, -0.019672073, 0.062481478, 0.0018487858, 0.011222493, -0.029829169, 0.022130238, -0.0002840079, -0.04773493) * inp_2_0_1; result3 += M4(0.49700227, 0.27449438, 0.33256647, 0.13850217, 0.24635719, 0.20387016, 0.17043021, -0.0163939, -0.1460736, -0.053372465, -0.038251426, -0.13872208, 0.067372546, -0.07736337, -0.009627407, -0.0026862074) * inp_2_1_1; result3 += M4(0.1817412, 0.22691032, -0.043124903, -0.035016127, 0.054409128, 0.05345417, -0.042170398, 0.059411958, 0.06607303, -0.062856466, 0.096283935, 0.045097217, 0.025960647, 0.08227504, -0.0507895, 0.043821495) * inp_2_2_1; result3 += M4(-0.031986434, 0.012987435, -0.08489054, -0.026731597, -0.0078079957, -0.13354276, 0.060593486, 0.060747415, -0.010851016, 0.4664801, 0.06491011, -0.023194442, -0.04618695, -0.007261282, 0.070509985, -0.0035181446) * inp_2_0_2; result3 += M4(-0.018880002, -0.550916, 0.07930929, 0.0028127227, 0.11175226, -0.73254424, 0.013858653, 0.111013226, 0.008463192, 0.122215405, 0.025371715, -0.14817141, -0.009153332, -0.5058772, 0.2843366, -0.09837672) * inp_2_1_2; result3 += M4(0.06638847, -0.379183, 0.07764303, -0.08592844, 0.056915276, -0.20073003, 0.05886113, 0.012288142, 0.07630634, 0.86829257, -0.07294268, -0.17273578, -0.019856475, -0.8567808, 0.064931154, -0.130162) * inp_2_2_2; const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0]; const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1]; const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2]; const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0]; const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1]; const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2]; const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0]; const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1]; const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.07428233, -0.11267418, 0.037147317, 0.012798383, -0.019355856, 0.2063775, -0.04869492, 0.023931865, 0.01826915, 0.061503474, 0.0725478, -0.016411101, 0.101941735, -0.07801213, 0.04161863, 0.0091594625) * inp_3_0_0; result0 += M4(-0.010983235, -0.16873623, 0.09934868, 0.1475657, 0.3739511, -0.26998657, -0.0023212505, -0.13427515, -0.12743473, 0.32550758, -0.00587959, 0.020362807, 0.14574823, -0.055605818, -0.089228176, 0.07283733) * inp_3_1_0; result0 += M4(-0.029632267, -0.006311438, 0.03683852, 0.0460545, -0.30349982, -0.22447036, -0.104302175, 0.006356504, -0.13840042, 0.165813, -0.03996628, -0.0009846408, 0.17081043, -0.005697063, 0.0636153, -0.015204903) * inp_3_2_0; result0 += M4(-0.0288406, -0.33211818, 0.21823318, -0.4359618, 0.13476162, 0.2285806, -0.03546847, -0.036070306, -0.25634402, 0.13865985, -0.043487567, 0.011144803, 0.005096203, -0.04584228, -0.0008283112, -0.05267128) * inp_3_0_1; result0 += M4(-0.10899708, 0.3470446, 0.0151194865, 0.025779225, 0.016572125, -0.08544362, 0.11226876, 0.027983677, -0.22979917, -0.041324202, 0.07830652, 0.1347805, -0.32296747, -0.0066952063, 0.06512874, 0.55718046) * inp_3_1_1; result0 += M4(0.13161051, -0.019632412, -0.007245154, -0.046875164, -0.0063375905, 0.12709594, 0.048449367, 0.051778726, -0.24015811, 0.21853109, 0.052569073, 0.061736215, -0.197316, 0.1515158, -0.027146904, -0.044522174) * inp_3_2_1; result0 += M4(0.048434425, 0.22990787, -0.01401221, -0.09624674, 0.011959234, 0.24457654, -0.06643086, -0.07578145, -0.13297154, -0.036548138, 0.049699903, 0.06231048, -0.110873826, -0.076110214, 0.002417552, 0.06022659) * inp_3_0_2; result0 += M4(-0.41839388, 0.22337331, 0.039156444, 0.0853396, 0.021588642, 0.0067923763, -0.12538072, -0.0549457, -0.1382161, -0.018326575, 0.041040212, 0.0237146, 0.05310075, 0.12090509, 0.08417775, 0.0426096) * inp_3_1_2; result0 += M4(0.01903611, -0.03635987, -0.02728464, -0.0124920765, 0.012841934, -0.0045961696, 0.029713307, -0.040158134, -0.16160838, 0.07344862, 0.058719117, 0.05547189, 0.22943653, -0.039776597, 0.019647159, 0.049335755) * inp_3_2_2; result1 += M4(-0.050886225, 0.010773032, 0.068307996, 0.05066938, -0.10175775, 0.11504418, 0.057479113, -0.07015231, -0.011961269, 0.05447044, -0.024111401, 0.03896853, 0.011676859, -0.00091025315, 0.0037686634, 0.01574372) * inp_3_0_0; result1 += M4(0.0011024862, -0.6409941, -0.04154058, -0.29942003, -0.04070786, 0.6291146, -0.31498915, -0.22144635, -0.10045137, -0.06473676, 0.0025352717, 0.03836787, 0.11647241, -0.06366675, -0.12816723, -0.26756752) * inp_3_1_0; result1 += M4(0.059803132, 0.04112017, -3.865815e-05, -0.21099187, -0.0766396, 0.49812844, 0.23124191, 0.19388396, -0.10059807, 0.13539618, 0.012365532, 0.26512218, 0.27849954, -0.4466902, -0.14386821, 0.013069269) * inp_3_2_0; result1 += M4(-0.0011572625, -0.08685813, 0.03506645, 0.32730153, 0.05965957, -0.030346887, -0.033162106, 0.10434774, 0.087460466, -0.04626651, -0.016391698, -0.055453468, 0.09741922, 0.04287869, -0.030375898, -0.033166446) * inp_3_0_1; result1 += M4(0.20846428, -0.74136734, 0.5844527, 0.08609542, -0.015801225, 0.030329805, 0.13481435, -0.0682619, 0.12169573, 0.23842777, 0.006281105, -0.050572608, -0.20900005, -0.37843063, 0.59546393, 0.07246317) * inp_3_1_1; result1 += M4(0.07766126, -0.089687124, -0.02369101, -0.39898276, 0.08709351, 0.15610176, -0.14815669, 0.13425788, 0.032636512, 0.011805772, 0.1855222, 0.49408266, 0.2102859, -1.2030262, -0.12703134, -1.5024164) * inp_3_2_1; result1 += M4(0.08445707, -0.13061571, -0.09417561, -0.48509976, 0.07966434, 0.023925027, 0.032142166, 0.044588283, 0.011262317, -0.011922964, 0.09275115, -0.019332774, -0.08464665, -0.036373246, 0.057657905, 0.008083529) * inp_3_0_2; result1 += M4(-0.013658516, 0.17535844, 0.43256798, -0.3588802, 0.21435346, 0.15809372, 0.05006835, -0.043844294, 0.04974172, 0.21411541, 0.03997919, 0.14509, 0.05745982, -0.02040189, 0.12265856, -0.26175705) * inp_3_1_2; result1 += M4(-0.012978171, -0.06615713, -0.016302722, -0.07360273, 0.1120467, 0.2523338, 0.013040222, 0.07845912, 0.12137823, 0.13015787, 0.036681052, 0.09809164, 0.014065165, -0.122808255, -0.039797485, -0.46501014) * inp_3_2_2; result2 += M4(-0.080871835, -0.22307916, 0.07004091, 0.06692253, -0.25825113, -0.030632246, 0.07111695, 0.44181773, 0.09049517, -0.008341272, 0.044914108, -0.18295835, -0.014715955, -0.008019641, -0.03798547, -0.074919805) * inp_3_0_0; result2 += M4(-0.060306337, -0.19063218, -0.20212461, -0.011923865, 0.2857088, 0.009115366, 0.11899254, 0.14336674, 0.18231158, -0.13981315, 0.045094874, -0.09755735, 0.040069308, 0.016306436, -0.21158268, -0.06527811) * inp_3_1_0; result2 += M4(-0.00039039235, -0.03416186, 0.018411878, -0.010035256, 0.0074744555, -0.30681312, 0.04060132, 0.08147552, 0.033693682, -0.037743136, 0.009858464, -0.057877015, -0.015383695, -0.07576271, -0.03201459, -0.07223331) * inp_3_2_0; result2 += M4(0.15047242, -0.10756204, 0.09433242, 0.14607352, 0.028909385, 0.07892575, 0.007390004, 0.14807044, 0.056910306, -0.037883703, 0.050295055, -0.35840037, -0.019727677, -0.0066186097, -0.0021540523, 0.017334) * inp_3_0_1; result2 += M4(0.14233728, 0.24207225, -0.05877726, 0.11620321, 0.10720529, 0.0008283299, -1.5752418, 0.08940047, 0.36390868, -0.36308664, -0.010029068, -0.27397126, 0.06675105, 0.07950608, -0.11110028, 0.10758905) * inp_3_1_1; result2 += M4(-0.0076967604, -0.0063465773, 0.07495261, 0.060014434, -0.21927468, -0.16195607, 0.004462335, 0.1700076, 0.18680118, -0.08768889, 0.08070937, -0.13451485, 0.027677529, -0.37147608, 0.0492793, 0.20355256) * inp_3_2_1; result2 += M4(0.28913042, -0.1857833, -0.19982226, 0.0047614267, -0.040002443, 0.013629103, 0.14590615, -0.1875878, 0.13732162, -0.0996179, 0.08447261, 0.059818078, 0.12112791, 0.04442736, -0.036067992, 0.009009383) * inp_3_0_2; result2 += M4(0.074754044, -0.24406068, 0.5842961, 0.01011099, 0.06859522, 0.106121376, 0.20751011, -0.078184746, 0.06569143, -0.05616781, 0.43368873, -0.18342853, 0.0648663, 0.08838746, -2.5457027, -0.032766283) * inp_3_1_2; result2 += M4(-0.016084341, 0.049012687, -0.0840192, -0.009945125, -0.13082097, -0.026519094, 0.22928171, 0.109046146, 0.1913399, -0.06577869, 0.05957043, -0.111836284, 0.21928439, -0.044628173, 0.007977876, -0.08064955) * inp_3_2_2; result3 += M4(0.07694559, -0.0138822915, 0.06503231, -0.03360788, -0.07092503, -0.00048928615, -0.026823722, -0.02658003, 0.02215056, 0.031604793, 0.04115292, 0.040732138, -0.008415167, 0.054748695, 0.03447441, 0.018476207) * inp_3_0_0; result3 += M4(-0.036803186, -0.05286142, 0.007957323, -0.017383374, 0.17671868, 0.27140468, 0.13692562, -0.012053789, 0.10316045, -0.013202491, -0.016275119, 0.01879904, -0.062531106, 0.029337812, -0.078415155, -0.0249412) * inp_3_1_0; result3 += M4(0.018963913, -0.010214533, 0.018063482, 0.031577088, 0.05999718, -0.113296494, -0.09365664, 0.26440683, 0.08826179, 0.016142845, 0.0216411, -0.101425715, 0.009847335, -0.008299357, 0.074420415, 0.011775249) * inp_3_2_0; result3 += M4(-0.39081585, -0.0838673, 0.27916974, -0.07050347, -0.020497013, -0.1974258, -0.11615982, -0.04604486, 0.0473447, -0.029950554, 0.007203267, 0.039071508, 0.05709263, 0.024631454, 0.057777587, 0.040751096) * inp_3_0_1; result3 += M4(0.03844488, 0.018994762, 0.016261842, 0.51800346, -0.06746922, 0.32197532, 0.020198246, -0.1261948, -0.047454923, 0.19068927, -0.058833476, 0.17420405, -0.035010174, -0.24515375, -0.049961384, 0.42633954) * inp_3_1_1; result3 += M4(-0.10035564, 0.013695476, 0.05426191, -0.014002882, -0.0910872, -0.24419665, -0.034756966, -0.2550853, 0.13050632, 0.048386525, 0.08481854, -0.015868377, -0.7629353, 0.07594038, 0.058023147, -0.25982517) * inp_3_2_1; result3 += M4(0.053038094, 0.3456283, -0.028815135, -0.021221591, -0.030001303, 0.26908705, -0.03715217, -0.093417, -0.0061158077, 0.46648175, 0.06069643, 0.042290237, -0.022208512, -0.022681322, 0.011513289, 0.07366402) * inp_3_0_2; result3 += M4(-0.023000399, 1.1273052, -0.004522217, 0.22401826, 0.00028205267, -0.82191133, -0.08815093, -0.10096036, 0.052339103, -0.494361, 0.014254294, 0.05702493, -0.053227764, -1.9380327, 0.044443835, 0.10758067) * inp_3_1_2; result3 += M4(0.052999526, 0.40421763, -0.012616779, 0.002641059, 0.12748745, 0.37926105, 0.022284118, -0.1623692, -0.021195088, -0.23581114, 0.013012999, 0.055029694, -0.064298704, -2.3030765, 0.092065066, 0.03497193) * inp_3_2_2; const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2); imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0))); imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0))); } //!DESC ArtCNN C4F16 (Conv2D-3-ReLU) //!COMPUTE 24 32 12 16 //!HOOK LUMA //!BIND conv2d_2 //!SAVE conv2d_3 //!WIDTH LUMA.w 2.0 * //!HEIGHT LUMA.h 2.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif const ivec2 ksize = ivec2(3, 3); const ivec2 offset = ksize / 2; const ivec2 wg_size = ivec2(gl_WorkGroupSize); const ivec2 isize = wg_size + ksize - 1; shared V4 inp[4][isize.y][isize.x]; void hook() { const uvec2 local_xy = gl_LocalInvocationID.xy; ivec2 base = ivec2(gl_WorkGroupID) * wg_size; for (uint y = local_xy.y; y < isize.y; y += wg_size.y) { for (uint x = local_xy.x; x < isize.x; x += wg_size.x) { const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2); inp[0][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(0, 0), 0)); inp[1][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(1, 0), 0)); inp[2][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(0, 1), 0)); inp[3][y][x] = V4(conv2d_2_mul * texelFetch(conv2d_2_raw, input_base + ivec2(1, 1), 0)); } } barrier(); V4 result0 = V4(0.014308364, -0.031445663, -0.01849288, -0.004246903); V4 result1 = V4(-0.021231862, 0.018275987, 0.004760984, 0.033462685); V4 result2 = V4(0.0017615217, 0.012336949, -0.022513494, 0.009208594); V4 result3 = V4(-0.0013368673, 0.0046511, 0.021403618, 0.012234549); const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0]; const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1]; const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2]; const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0]; const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1]; const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2]; const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0]; const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1]; const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.025315678, -0.40740538, 0.37080577, 0.17917906, 0.1517135, 0.09999762, 0.03209811, -0.025414864, -0.0072085187, -0.044233754, -0.14142239, 0.021514736, 0.074731536, 0.06694275, 0.08969647, 0.01626352) * inp_0_0_0; result0 += M4(-0.4784662, -0.44962025, 0.16025928, -0.112756014, 0.098072365, -0.16340493, -0.10911931, -0.057525925, -0.07066941, 0.04618561, -0.3869227, 0.07643021, 0.002182261, -0.29465193, 0.12064611, -0.15072991) * inp_0_1_0; result0 += M4(-0.3007937, 0.0072757923, -0.07775808, 0.075630665, 0.07475434, -0.030880397, 0.017956393, 0.030480307, -0.07537768, -0.0060714884, -0.10579664, -0.041868184, -0.029362228, 0.0019552058, 0.0033949236, -0.042928576) * inp_0_2_0; result0 += M4(-0.055378314, 0.11835048, -0.21005718, -0.067133605, 0.037751865, 0.111293666, -0.2152327, 0.35863844, 0.09041399, -0.13204306, -0.023983477, 0.035911985, -0.1136856, -0.101052254, -0.026796436, 0.17415252) * inp_0_0_1; result0 += M4(-0.19136935, 0.25973448, -0.35633188, -0.13955162, -0.022144493, -0.12332462, 0.12980436, -0.06716298, -0.226214, -0.017355867, 0.31573844, -0.07952507, 0.06953637, 0.3504291, 0.3310475, -0.4188637) * inp_0_1_1; result0 += M4(0.07861268, -0.011944142, -0.06872131, 0.06526956, -0.23126051, -0.0787915, 0.3074996, -0.1560054, -0.0515472, -0.15933825, 0.008892285, -0.032828208, -0.04228462, 0.19568804, -0.056245092, 0.2529376) * inp_0_2_1; result0 += M4(0.0092896065, -0.0029096454, -0.23034982, 0.011114423, -0.1106547, -0.15602478, 0.2677796, 0.19269039, -0.031739205, -0.03228303, 0.011086476, -0.05332459, -0.22346567, 0.08080637, -0.020646531, 0.08239571) * inp_0_0_2; result0 += M4(-0.15126343, 0.022163287, -0.15783563, -0.025882605, 0.15493815, 0.1494604, -0.034188353, 0.047689065, 0.14822488, -0.010189335, 0.18554454, 0.030036429, 0.024229797, 0.06672721, -0.27624926, -0.0014910502) * inp_0_1_2; result0 += M4(-0.11723486, -0.024278458, -0.0918668, -0.05912352, 0.0804538, 0.22284164, 0.082265265, -0.086509205, 0.077792905, 0.06799491, 0.10968278, 0.07208801, -0.10925173, -0.19539174, -0.20790318, -0.08770796) * inp_0_2_2; result1 += M4(0.3236074, -0.016813854, -0.05796897, -0.07314582, -0.06281415, 0.012611842, 0.016730722, 0.11916892, 0.054947067, -0.024866354, -0.007610447, -0.11161885, 0.04581396, 0.009688132, 0.021315157, -0.08097858) * inp_0_0_0; result1 += M4(0.1599916, -0.031528797, 0.0126992585, 0.016889824, 0.11132726, -0.06296075, 0.00051726383, -0.3424913, -0.004174934, -6.6618624e-05, -0.012995042, 0.005965193, -0.06023162, 0.003182649, 0.021039585, -0.11947315) * inp_0_1_0; result1 += M4(0.027892213, -0.00062689267, -0.01636723, -2.2716187e-05, 0.25023237, -0.007830497, 0.021714738, -0.56807584, -0.053948745, -0.024804676, 0.045291945, -0.13231511, 0.07675576, 0.06605667, -0.032000583, 0.10833098) * inp_0_2_0; result1 += M4(-0.36363825, 0.021935955, -0.18368474, 0.028652018, 0.14966221, 0.027848009, 0.072001524, 0.3989147, -0.060279924, 0.025458539, -0.0016928725, 0.060513448, -0.030946735, 0.116279155, -0.027582984, 0.07756251) * inp_0_0_1; result1 += M4(-0.23284562, 0.14024308, 0.062349986, -0.12310892, -0.19432355, -0.08142183, -0.061544392, -0.45819297, -0.037947427, 0.39792922, 0.07392991, -0.088718906, 0.15805514, 0.30931425, -0.1016847, -0.078503326) * inp_0_1_1; result1 += M4(0.02309257, -0.024762606, 0.008150015, 0.023823071, -0.15807371, 0.03965886, -0.016814513, -0.07145885, -0.20561801, 0.02365692, 0.0007521815, 0.1441513, 0.14683372, -0.046197027, -0.008067438, -0.36100286) * inp_0_2_1; result1 += M4(-0.1421229, -0.099885836, -0.09427238, 0.1337728, -0.0066758767, -0.009484399, 0.06744897, 0.30264565, -0.03263222, 0.03592801, 0.10090736, 0.028849022, -0.041092165, -0.05571495, -0.117078304, -0.2493043) * inp_0_0_2; result1 += M4(0.030493004, 0.013943649, 0.008176111, -0.048232153, -0.029120184, 0.0832065, 0.012535422, -0.06679524, 0.072213255, 0.047026567, 0.0739274, 0.021540673, -0.10233502, 0.04503862, -0.16855547, 0.19900726) * inp_0_1_2; result1 += M4(-0.10335507, 0.03536097, 0.0033068694, 0.0032172517, 0.049036283, 0.07101889, -0.10331261, 0.14447233, 0.05273494, -0.014971053, -0.08585235, 0.11687278, -0.077435136, 0.03600481, 0.15392411, -0.22504471) * inp_0_2_2; result2 += M4(-0.19788808, 0.088607766, 0.03288371, 0.018204056, -0.024293097, -0.008095376, 0.03442426, -0.036480363, -0.008784721, 0.08859438, -0.10649091, 0.030977005, 0.016547365, -0.0022618754, 0.0324507, -0.017606767) * inp_0_0_0; result2 += M4(-0.04685454, -0.21285146, -0.010454878, -0.057078585, 0.065119885, 0.10927382, -0.05321152, 0.060907405, 0.08013327, -0.0073412405, -0.20529112, -0.08758012, 0.09224284, -0.067262664, 0.1502062, 0.05410036) * inp_0_1_0; result2 += M4(-0.041660745, 0.0048408103, 0.003651176, 0.071307175, 0.01425864, 0.09917961, -0.05065944, 0.04616355, -0.106611334, 0.08681873, -0.07359776, -0.09394259, 0.11695499, -0.1402854, 0.061297946, -0.045418613) * inp_0_2_0; result2 += M4(-0.23553102, 0.5484784, -0.09534771, -0.02045959, 0.13755015, -0.11109039, -0.06437182, -0.2358323, 0.010125626, -0.11606748, -0.08721418, 0.07951246, 0.14079992, -0.080286875, 0.03190584, -0.11406279) * inp_0_0_1; result2 += M4(-0.10584075, -0.2533913, 0.19931902, -0.06487168, -0.1717319, 0.030989863, 0.03765032, -0.23701997, 0.392678, -0.20434664, -0.011459706, -0.012392768, -0.19392636, -0.19896725, 0.5523695, 0.10076211) * inp_0_1_1; result2 += M4(-0.07930697, 0.012467926, -0.018473605, 0.049829062, 0.041304592, -0.20307006, 0.061710116, -0.1607002, 0.05850921, -0.054579165, 0.05506371, 0.018534066, -0.05382266, 0.19656838, -0.14599052, 0.10722883) * inp_0_2_1; result2 += M4(-0.14471711, -0.089903586, -0.024194038, -0.008216228, 0.21955849, -0.3950722, 0.05865401, -0.15680425, 0.0034217027, 0.09524851, -0.014997871, -0.01323649, 0.0042688143, 0.033086605, -0.03602252, 0.14105399) * inp_0_0_2; result2 += M4(0.020014875, -0.11133592, -0.023629094, -0.0709405, 0.07574613, 0.4842027, -0.016238315, -0.06935021, 0.048983708, -0.02455697, 0.023512363, 0.032327037, -0.12747441, -0.137746, -0.0041232975, -0.29301938) * inp_0_1_2; result2 += M4(-0.04711032, -0.019662969, 0.020692779, 0.01689221, 0.030064257, -0.032970324, 0.093564175, -0.06074772, -0.0065473514, 0.10435069, -0.051301178, 0.07506068, -0.092831016, -0.0098741185, 0.092322186, -0.09218845) * inp_0_2_2; result3 += M4(-0.109521486, -0.25843716, 0.051019795, 0.008796125, 0.006710635, -0.0049244356, -0.028464736, -0.020576421, 0.018126301, -0.03288581, -0.059816107, 0.012544033, 0.019033715, 0.036934555, 0.03997806, 0.011696228) * inp_0_0_0; result3 += M4(0.15896165, -0.40192378, 0.020870406, -0.040908355, -0.06158074, 0.04799475, 0.02708582, 0.025623411, 0.031087726, 0.15594003, 0.061994012, -0.055818196, 0.009436111, 0.04553281, -0.009773234, 0.05364127) * inp_0_1_0; result3 += M4(0.05062757, -0.34977517, -0.017327579, 0.029586911, -0.015337377, 0.1657842, -0.023074357, -0.48225418, -0.101821445, -0.07600293, -0.005563617, -0.5261255, 0.00761482, -0.058566544, 0.05186853, 0.04371315) * inp_0_2_0; result3 += M4(-0.06422451, -0.6366767, -0.036370903, -0.0831035, 0.0374723, 0.38192326, 0.010392014, -0.08228988, 0.0633136, -0.048369046, 0.06295746, 0.0043660635, 0.037951868, -0.08372121, 0.055318434, -0.05734989) * inp_0_0_1; result3 += M4(0.18938878, -0.58693033, 0.11619453, -0.045211863, -0.081650935, 0.30361742, -0.12818241, -0.10964245, -0.21211484, -0.2671186, 0.14642583, 0.10737925, 0.0061715045, -0.38273, -0.22397554, -0.04302644) * inp_0_1_1; result3 += M4(-0.016349845, -0.21763504, -0.0010806874, -0.0011352341, -0.1379133, 0.19977011, -0.019010192, 0.15351702, -0.049592014, -0.015350318, 0.075927876, 0.17186968, 0.12509024, 0.02728859, -0.037378654, -0.32351947) * inp_0_2_1; result3 += M4(-0.019764764, -0.31119177, -0.06767115, -0.014068403, 0.03635759, 0.14193599, 0.0015609096, -0.0077975956, 0.009449523, -0.01775413, 0.062425796, 0.026602777, -0.025830995, -0.10380951, -0.02119342, 0.0054662493) * inp_0_0_2; result3 += M4(0.046277538, -0.45933232, 0.06593023, -0.03638144, -0.07667149, 0.29901657, -0.0016867243, 0.11688226, 0.031845666, -0.027253512, 0.07584758, 0.08993896, -0.060812075, -0.04589073, -0.21525094, -0.047655407) * inp_0_1_2; result3 += M4(0.008118003, -0.31232572, -0.01947948, -0.028818972, 0.01954803, 0.14400195, 0.07366684, 0.08039247, -0.039747268, -0.07018961, -0.006076149, 0.010897608, 0.0013431206, -0.038716212, 0.033262603, -0.06467397) * inp_0_2_2; const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0]; const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1]; const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2]; const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0]; const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1]; const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2]; const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0]; const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1]; const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.034586944, 0.051405124, 0.024543753, 0.09663699, 0.21466514, -0.14363779, 0.091029935, 0.06438413, -0.07787725, 0.02310036, -0.08526669, -0.06898668, -0.07615313, 0.1859357, 0.051052812, 0.03592223) * inp_1_0_0; result0 += M4(0.14242817, -0.12330711, 0.26888746, 0.027858777, -0.19174838, -0.3025271, 0.21068037, -0.22111407, 0.039243873, 0.32096145, -0.19555481, 0.041925013, -0.081656374, 0.042261768, 0.4310771, -0.17600678) * inp_1_1_0; result0 += M4(0.07237473, -0.019333057, 0.01465501, 0.0103286505, -0.0033088874, 0.0049798666, 0.005620796, 0.050308306, 0.095135905, 0.11874092, -0.089677125, 0.06639713, 0.008767325, 0.020067416, -0.054253392, -0.02636066) * inp_1_2_0; result0 += M4(0.12108157, -0.08878159, 0.20186365, -0.013874086, -0.41484395, -0.14844953, 0.0738829, -0.045337558, 0.042086396, 0.059161253, -0.012553872, -0.23520784, -0.12646332, 0.26784688, -0.20803319, -0.03942921) * inp_1_0_1; result0 += M4(-0.23782037, -0.4498384, -0.2894948, -0.11137584, -0.41319975, -0.17482656, -0.117727414, 0.04227422, 0.44053125, -0.040965743, -0.07687903, 0.1572281, -0.14606549, 0.5611193, -0.017074287, -0.26588884) * inp_1_1_1; result0 += M4(0.13518186, -0.1952343, -0.033405837, 0.15649372, 0.014096296, -0.1471687, 0.022654844, -0.16781017, 0.1897826, 0.05537809, 0.011526008, 0.16989169, 0.25440302, 0.02105957, 0.07173544, 0.13849147) * inp_1_2_1; result0 += M4(0.12111377, 0.06486296, -0.054877255, 0.013369709, -0.26179752, 0.008404512, -0.291739, 0.09870817, 0.30430415, -0.02468248, 0.09765464, -0.073647045, 0.003215954, -0.17846975, 0.11312282, 0.061767615) * inp_1_0_2; result0 += M4(-0.0767532, 0.46677387, -0.17212266, -0.03160566, -0.3810842, 0.056070074, -0.25618377, -0.100580625, -0.12561269, -0.016203389, 0.3710484, 0.00074308756, 0.10880074, -0.21513747, -0.21955177, -0.10800783) * inp_1_1_2; result0 += M4(0.07202282, 0.08571014, 0.04075649, 0.024679877, 0.11339893, 0.10422508, -0.014214932, -0.19761975, 0.05781747, -0.18774833, 0.14175606, 0.12277776, -0.14729902, 0.0993602, 0.03688781, -0.06296111) * inp_1_2_2; result1 += M4(0.061538264, 0.060805965, 0.017201789, 0.22006647, -0.01602865, 0.035268907, -0.00068671186, -0.18142733, -0.0058686594, 0.04291206, 0.0014080717, 0.13177212, -0.0016620886, 0.09081882, -0.031675898, -0.29883888) * inp_1_0_0; result1 += M4(-0.15646641, 0.0058149397, -0.034551397, -0.087699234, -0.0009914644, -1.4757978e-05, 0.00055921706, 0.23698746, -0.15453093, 0.06698371, -0.014193776, 0.055897992, -0.19766432, 0.046747115, 0.03361082, -0.63573986) * inp_1_1_0; result1 += M4(0.07094494, 0.00023815216, 0.006596921, -0.05585263, 0.07675021, -0.06228464, 0.014950143, -0.0021290332, -0.12428363, 0.016943645, -0.0096276365, -0.022048702, -0.06971582, -0.029114002, 0.049153633, -0.39115208) * inp_1_2_0; result1 += M4(0.049078394, 0.06519899, 0.041337896, 0.18306711, 0.06364234, -0.036350865, -0.082766965, -0.04908475, -0.03600516, 0.13349976, 0.075641125, 0.09301542, 0.098319165, -0.111341886, -0.11690513, -0.30515233) * inp_1_0_1; result1 += M4(-0.047191925, 0.122746356, -0.0033969765, 0.08131301, 0.1518897, 0.02669907, 0.033575322, -0.511831, -0.18935362, 0.09478832, 0.018412145, 0.27864313, 0.4335526, 0.38276324, 0.056418765, 0.092538744) * inp_1_1_1; result1 += M4(0.06391543, -0.032844223, -0.0015351484, 0.08773329, 0.0818433, -0.00886031, 0.006732896, -0.004721323, -0.0030527106, 0.0014216377, -0.04981653, 0.17145506, 0.003415348, 0.04492702, 0.032660168, -2.0050154) * inp_1_2_1; result1 += M4(-0.08958046, 0.10260106, 0.11284484, 0.19449843, -0.15825003, -0.022389483, -0.1325153, -0.16857241, 0.16755177, 0.031064708, 0.1567697, 0.16521265, -0.065289006, -0.022755811, 0.03485217, 0.09884881) * inp_1_0_2; result1 += M4(-0.109482475, 0.013460136, -0.04917644, -0.15271996, -0.2583299, 0.060626, -0.041091472, -0.15900281, 0.47309366, -0.03839146, -0.0045616627, 0.0008328471, 0.0082431855, -0.21283317, -0.18913719, 0.2243721) * inp_1_1_2; result1 += M4(-0.030455546, -0.020884665, 0.06928861, -0.02054186, -0.051363032, -0.028426047, 0.032410964, 0.27805674, 0.09577153, 0.0018001996, -0.009598458, 0.24205303, 0.06581605, 0.032396894, 0.15634531, -0.062486712) * inp_1_2_2; result2 += M4(0.04517026, -0.015449355, 0.030736623, -0.064001635, 0.10251629, -0.010003219, 0.00246948, -0.041401625, -0.07452645, 0.055657007, 0.017119868, 0.00057093386, 0.076428145, -0.13702212, 0.041210357, -0.097048655) * inp_1_0_0; result2 += M4(-0.11847643, 0.07740604, 0.04646375, -0.006719404, 0.078692794, 0.018933047, -0.03411219, 0.005511014, -0.12282886, 0.0056976853, -0.0050732824, -0.020660818, -0.031830277, 0.030043423, 0.056548044, -0.090381294) * inp_1_1_0; result2 += M4(-0.033105094, -0.0078620855, -0.022709917, 0.019003244, 0.024807753, 0.052204266, -0.071791224, 0.011485747, -0.07369586, 0.0137782805, 0.003582809, 0.035403106, -0.01405195, 0.07256009, 0.011241367, -0.06909385) * inp_1_2_0; result2 += M4(0.026971996, 0.006361849, 0.14840193, 0.11840187, -0.23391397, 0.19595902, 0.021233503, -0.11863109, -0.02861322, -0.0805317, 0.19878447, 0.04877218, -0.03260752, 0.47443804, -0.0007973998, -0.083458565) * inp_1_0_1; result2 += M4(0.10141363, 0.056519937, 0.09495047, -0.083211236, -0.33066672, 0.31982478, 0.06656332, -0.5255531, 0.39907596, -0.04551523, 0.076361425, 0.515442, 0.2894429, -0.026761035, 0.15433668, -0.22964536) * inp_1_1_1; result2 += M4(-0.052999362, 0.052562866, -0.035938874, 0.032367133, -0.081125945, -0.008548732, -0.02534536, 0.09177157, -0.015444048, 0.12723202, -0.07120179, -0.0022531084, -0.018742096, 0.042202547, 0.030112537, 0.16257262) * inp_1_2_1; result2 += M4(0.007041716, 0.018217154, 0.08307236, -0.04520616, -0.31561956, 0.23123385, -0.067681044, -0.12643862, 0.010874087, -0.10742594, 0.030002201, -0.14344977, 0.10406159, -0.20533821, -0.030862322, -0.10753616) * inp_1_0_2; result2 += M4(0.11211696, -0.11703306, -0.029693123, 0.07388003, -0.2088053, 0.15857096, 0.047811363, -0.19972345, 0.03004797, -7.211207e-05, 0.011323185, 0.053247977, -0.11488151, -0.21737544, -0.09646544, -0.32760012) * inp_1_1_2; result2 += M4(0.023682857, 0.028017636, 3.7979324e-05, 0.044192187, 0.06769071, 0.062285192, -0.00054911565, 0.044016127, -0.054126956, 0.073501885, 0.0038514957, 0.14338864, 0.07184208, 0.021787634, 0.008975992, -0.038967796) * inp_1_2_2; result3 += M4(0.0076448624, 0.073063046, -0.018112276, 0.044603255, -0.028443448, 0.042257074, 0.033456415, 0.10408893, 0.013544821, -0.10530665, -0.013452262, 0.024509164, -0.032966062, 0.2765319, 0.06838193, -0.008048515) * inp_1_0_0; result3 += M4(0.01256106, 0.0046958136, 0.03241983, 0.055210363, 0.07731245, -0.07009144, -0.048668608, 0.05429069, 0.07470501, 0.039248727, 0.03182757, -0.06734372, 0.1485473, -0.09281201, -0.054269433, 0.03705732) * inp_1_1_0; result3 += M4(0.016695606, 0.058716543, -0.022234729, 0.010488249, 0.019232022, -0.044709843, -0.032776598, 0.12039033, -0.017063448, -0.09488618, -0.00074972864, -0.18022363, 0.035581883, -0.12771232, 0.037610672, -0.02314014) * inp_1_2_0; result3 += M4(0.033441305, -0.17919269, 0.071414076, -0.040231004, -0.051545292, 0.3152309, 0.032920167, -0.050826155, 0.030751579, -0.043526605, 0.101936825, 0.10663154, -0.010965119, 0.23196171, -0.03446115, 0.012430032) * inp_1_0_1; result3 += M4(-0.039667293, 0.28457764, 0.13565247, 0.021205531, 0.05632608, -0.18567143, 0.16571477, -0.23085211, 0.076178744, -0.04070206, 0.11625753, 0.11961359, 0.09332873, 0.061907627, 0.30030668, 0.10909142) * inp_1_1_1; result3 += M4(0.108230695, 0.15231377, -0.0005546767, -0.19747706, 0.06423663, -0.031372365, -0.037357267, 0.15446967, 0.01659812, 0.0051198537, -0.027291223, -0.022681922, 0.057102717, 0.13700908, -0.032580305, -0.11682838) * inp_1_2_1; result3 += M4(-0.0029815636, 0.13884383, 0.04761928, 0.059312716, -0.008409305, -0.9101977, -0.03828398, -0.07224407, -0.0005499478, 0.22700396, -0.007955575, 0.014039278, 0.00048419705, 0.14805353, 0.01756024, 0.08449175) * inp_1_0_2; result3 += M4(-0.0013124654, 0.08395497, -0.044070862, 0.08525564, -0.0057046493, 0.1908161, -0.045428224, 0.05620147, 0.03192862, 0.23222342, 0.07045153, 0.059786994, -0.017609006, 0.22580697, -0.095051855, -0.06213206) * inp_1_1_2; result3 += M4(-0.032118272, -0.05992314, 0.0014346216, -0.0144403465, -0.0040735975, -0.07336036, -0.019177906, -0.033247244, 0.036743563, 0.05132299, 0.023929443, 0.0894065, -0.02746859, -0.0016096028, 0.023279605, -0.03991766) * inp_1_2_2; const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0]; const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1]; const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2]; const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0]; const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1]; const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2]; const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0]; const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1]; const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.0715746, -0.09631244, 0.050157297, -0.08048454, -0.0025352512, 0.16482176, -0.106414065, 0.083849244, 0.12749393, 0.20426564, 0.29415998, -0.037008047, -0.26012027, -0.14854887, 0.20277677, 0.31178087) * inp_2_0_0; result0 += M4(-0.05076258, -0.13740575, 0.15595411, -0.053362656, -0.066203654, -0.16828513, 0.081790805, -0.017918047, -0.41374892, -0.52676713, -1.0831323, 0.22619742, 0.12750734, -0.14390856, -0.22360946, -0.5096736) * inp_2_1_0; result0 += M4(-0.10955963, -0.24669066, 0.15493616, -0.11090137, -0.027859153, 0.03351683, 0.09520772, -0.107278876, -0.45327467, 0.0021158315, -0.11707372, -0.28413826, -0.15181288, -0.030790092, 0.18309823, -0.26073602) * inp_2_2_0; result0 += M4(-0.0028697196, 0.06061011, -0.056756634, -0.036915377, -0.28824973, 0.012138959, -0.12313946, 0.10299592, 0.23378398, 0.049730666, -0.07091768, -0.01715781, -0.018962007, 0.16162728, 0.027316753, -0.008936573) * inp_2_0_1; result0 += M4(0.025687302, 0.059557397, -0.20743646, 0.18490358, -0.3239818, 0.17850724, -0.08651508, 0.14011745, -0.66113037, 0.11846311, -0.061496705, 0.13422967, -0.005783129, -0.15430583, -0.26381627, 0.054521244) * inp_2_1_1; result0 += M4(-0.015021453, -0.19950967, -0.27221996, -0.28598326, 0.110668495, 0.10796349, 0.05700929, -0.09480617, 0.055270392, 0.12745835, 0.116460495, -0.17917821, 0.12730812, -0.0012037748, 0.32492042, 0.08095944) * inp_2_2_1; result0 += M4(0.06407335, 0.030303145, -0.08825665, -0.0035974658, 0.08393234, -0.009050897, 0.21445066, 0.043095548, 0.1661051, -0.0459125, 0.10902106, 0.039837647, -0.061346497, -0.009559113, -0.02383936, 0.0028487262) * inp_2_0_2; result0 += M4(-0.1500397, 0.025134781, -0.10852966, -0.11559518, -0.18731914, 0.06851372, -0.066781044, -0.10385313, 0.17443936, -0.021997442, -0.13519152, -0.014186667, -0.18113253, 0.07363012, 0.12357896, 0.11021419) * inp_2_1_2; result0 += M4(-0.005192235, 0.044567242, -0.011068163, -0.018504843, -0.04650239, -0.009309463, -0.07613297, -0.099593356, -0.029223667, -0.09067077, -0.029125804, -0.08720748, -0.004944248, 0.20919052, -0.051089592, -0.086393625) * inp_2_2_2; result1 += M4(-0.079497606, 0.033001993, -0.0433299, -0.49341437, 0.06402561, 0.027012901, -0.017186008, -0.026205903, 0.20847131, -0.17379077, -0.1460187, -0.3422193, -0.028837888, 0.14444311, 0.017298697, 0.16118568) * inp_2_0_0; result1 += M4(0.1362322, 0.0049404167, -0.0545727, 0.036812954, 0.029849706, -0.041181613, -0.011156492, -0.10626014, 0.4812216, -0.1561386, 0.13164558, -0.38440028, 0.09642372, -0.07784491, 0.004167074, -0.54048723) * inp_2_1_0; result1 += M4(0.22648239, -0.0008470028, 0.017957706, -0.018039277, -0.015313996, 0.014065825, 0.015366776, 0.10012267, 0.014650181, 0.018891001, -0.008666813, 0.10578885, -0.24156433, -0.06182133, -0.0048173345, 0.50967747) * inp_2_2_0; result1 += M4(0.04990316, 0.077742726, -0.07985242, -0.23526481, 0.010774825, -0.018184526, -0.032785896, -0.1857747, -0.23976691, -0.052705243, -0.35834587, 0.13117231, 0.01347248, -0.043906253, 0.032065123, -0.27442545) * inp_2_0_1; result1 += M4(-0.199354, -0.053708132, -0.076112315, -0.13419336, -0.08698115, -0.1992096, -0.0026194518, -0.49119824, -0.50951654, 0.10993369, 0.17314698, -0.43758488, -0.2935566, -0.011501036, -0.10976056, 0.4501387) * inp_2_1_1; result1 += M4(-0.21165654, 0.025460485, 0.055044506, -0.18715349, 0.26253015, 0.015346683, -0.0065055043, -0.1598433, -0.065605536, -0.025775231, -0.016590733, 0.31189066, 0.08281263, -0.0045969966, -0.13699894, -0.097940974) * inp_2_2_1; result1 += M4(-0.088196106, 0.03208788, -0.06850063, -0.18676445, 0.13442639, 0.06030712, -0.062297534, -0.12515818, 0.051921774, -0.071406096, -0.14802997, 0.037173603, -0.06478699, -0.04161509, -0.07285825, -0.009940458) * inp_2_0_2; result1 += M4(-0.13968326, -0.06019821, -0.0882567, -0.2698704, -0.09788051, -0.1485394, -0.085379414, 0.0072728456, 0.03298984, -0.074674346, -0.030420514, 0.17290293, 0.04263921, -0.11421748, -0.019576881, 0.2684657) * inp_2_1_2; result1 += M4(-0.1025962, -0.011311065, 0.01694238, -0.13532774, 0.0062958077, -0.058460772, 0.09474729, -0.17248796, 0.06978807, -0.030376935, 0.0025259845, 0.27620357, -0.04003609, 0.0008251506, 0.09974402, -0.19953759) * inp_2_2_2; result2 += M4(-0.0274369, -0.02785617, 0.111271836, -0.04777609, 0.022969048, -0.0103979325, -0.029450497, 0.071567275, -0.17947961, 0.3219096, -0.19316356, 0.06724893, 0.16150929, -0.17940994, 0.13669962, 0.0322664) * inp_2_0_0; result2 += M4(-0.063206606, -0.004748452, -0.047064275, -0.10338681, -0.03555205, 0.06408232, -0.11194569, 0.024295783, -0.5038483, 0.5358632, -0.025267152, -0.43657935, -0.11281369, 0.21069881, -0.0877243, 0.23032966) * inp_2_1_0; result2 += M4(0.09157177, -0.101289205, -0.0045268014, 0.007126031, 0.076024644, -0.011473527, -0.0014426362, -0.0772565, -0.066013575, 0.0573791, -0.057972565, -0.050329443, 0.13471925, -0.18278362, -0.124123216, -0.24296595) * inp_2_2_0; result2 += M4(0.0018280116, 0.017851619, 0.06933558, 0.2027289, 0.14181142, -0.0031327193, 0.10318157, 0.06581663, 0.1528262, 0.07430443, -0.02498459, -0.03398527, -0.121124454, 0.0130372215, -0.04157365, 0.13387349) * inp_2_0_1; result2 += M4(-0.2677744, -0.029308412, -0.07962408, -0.12204546, -0.29457334, 0.030129738, -0.26925707, -0.22190627, 0.110479586, 0.3287684, -0.14959732, 0.17304076, -0.04718754, -0.08847532, -0.1267441, -0.26355487) * inp_2_1_1; result2 += M4(-0.09800573, -0.14139035, 0.04140986, -0.18992579, 0.028541153, -0.0329216, 0.0016428916, 0.041346528, 0.009479444, 0.1538949, -0.016167583, 0.14781687, -0.20410268, -0.08003033, 0.058599915, 0.011661855) * inp_2_2_1; result2 += M4(-0.08390431, -0.015559658, 0.018878765, -0.12197927, 0.054793455, -0.03990255, -0.011211109, 0.036316004, -0.00909181, 0.01052049, -0.07726077, 0.0054688193, -0.058225293, 0.025337262, -0.054769665, 0.0038822803) * inp_2_0_2; result2 += M4(-0.042446077, -0.077310875, -0.015256182, 0.06345937, 0.073480725, 0.09065197, -0.0731339, 0.0723991, -0.0973175, -0.004725554, 0.00842376, -0.0057924674, 0.16431709, -0.041569393, -0.0024132037, 0.09127265) * inp_2_1_2; result2 += M4(0.103879355, -0.114007644, -0.036526263, -0.09562801, 0.026349826, -0.077657074, -0.0014605734, 0.020703465, 0.00056508253, -0.034128983, 0.00072431617, 0.0124235535, 0.119479515, 0.027588964, -0.040412117, -0.0019401097) * inp_2_2_2; result3 += M4(-0.0054999315, 0.054679673, -0.0046336288, 0.017204197, 0.01873656, 0.053969342, 0.0022390555, -0.079477236, -0.056740683, 0.061366238, -0.14898393, -0.009038732, 0.08329993, -0.08009324, 0.089084126, -0.052381206) * inp_2_0_0; result3 += M4(-0.07229899, -0.23929752, 0.045575928, -0.08491341, -0.037206728, -0.066858895, -0.009566649, -0.16403022, 0.070830956, 0.24849686, 0.022231523, -0.60494363, -0.12693338, 0.09039926, -0.061455548, 0.029116308) * inp_2_1_0; result3 += M4(0.14467773, 0.011604675, -0.011593841, 0.3062551, 0.014567709, 0.07741014, -0.012507825, 0.10407626, 0.06588569, 0.2327419, -0.021636222, 0.32575372, 0.0069890674, -0.019714063, -0.039952785, -0.15029353) * inp_2_2_0; result3 += M4(-0.016747722, -0.057078633, 0.032135896, 0.03921812, 0.01964165, -0.041367184, 0.034990914, 0.03307513, -0.008712541, 0.36230156, -0.009524529, -0.013199496, -0.06960267, 0.11308598, -0.025458774, -0.016773175) * inp_2_0_1; result3 += M4(0.10539831, 0.06910462, 0.10081203, -0.25462282, -0.051628787, 0.26734698, -0.09425403, -0.10206043, 0.0900536, 0.18957601, 0.024502385, 0.017249644, 0.013259816, 0.3805416, 0.09170492, -0.11961444) * inp_2_1_1; result3 += M4(0.03674796, -0.20332646, -0.0019113279, -0.049671765, 0.12674725, 0.016929649, -0.019033445, -0.12554397, 0.057757977, 0.0079718605, -0.03455191, 0.012654533, 0.10924967, 0.13866869, -0.06353784, -0.15997352) * inp_2_2_1; result3 += M4(-0.032977168, -0.008423808, 0.0131768845, -0.017573318, 0.0020663647, 0.05378569, -0.009707578, 0.01458635, -0.039570022, 0.26450455, -0.09522793, -0.0082146, 0.03125363, -0.0039754794, 0.0103347115, -0.018126415) * inp_2_0_2; result3 += M4(0.053846017, -0.2637847, 0.034869306, -0.031385496, -0.049894933, 0.11334788, -0.050957955, -0.07865399, 0.0110876765, 0.19674455, 0.02882469, -0.008416688, 0.0094687035, -0.20319644, -0.11101057, -0.08230348) * inp_2_1_2; result3 += M4(0.0044784476, 0.023611512, -0.042547744, 0.0007483636, 0.05025809, 0.14764841, 0.00044558995, 0.04103793, 0.048046812, 0.062373303, -0.014592626, 0.07766575, -0.0020092498, -0.0069300635, 0.03421132, -0.16338684) * inp_2_2_2; const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0]; const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1]; const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2]; const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0]; const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1]; const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2]; const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0]; const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1]; const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.0020083024, -0.59463024, 0.13003162, 0.27622712, 0.51078737, -0.19404446, 0.12455177, 0.27357444, -0.00015589895, 0.08237507, 0.060955554, -0.013423829, -0.13043714, -0.26090437, -0.0061712246, 0.057619814) * inp_3_0_0; result0 += M4(0.22813018, -0.16819888, 0.12644678, -0.06749888, 0.6929881, 0.06584952, 0.2152027, -0.0030935707, 0.03527825, 0.07355563, 0.20881042, -0.1287684, 0.006924962, -0.3719862, -0.036988992, 0.20827514) * inp_3_1_0; result0 += M4(0.049787976, -0.1781383, 0.054748017, 0.003178183, 0.44286513, -0.21408647, 0.2594575, -0.7740605, 0.033387452, 0.021950949, -0.022106573, -0.0067378515, 0.13417983, -0.11796911, -0.031810988, 0.1171123) * inp_3_2_0; result0 += M4(-0.18528815, 0.35193694, 0.11649766, -0.39793167, 0.08550548, -0.007938471, 0.20756447, -0.0918376, -0.1510857, -0.023432646, -0.12200965, 0.07017334, 0.26202187, 0.2199531, 0.21588492, -0.1368237) * inp_3_0_1; result0 += M4(-0.44707954, 0.19176868, 0.17061797, 0.18040551, -0.13039091, -0.10258424, -0.16008896, 0.037404172, 0.117038585, -0.17250147, -0.1328998, 0.052930426, -0.44167283, -0.13420883, -0.23059113, 0.054577902) * inp_3_1_1; result0 += M4(-0.12896018, 0.0633688, 0.034150008, -0.12148736, -0.004690631, 0.04905833, 0.27947712, 0.060562916, -0.03636501, 0.10707534, 0.055300053, -0.08954744, 0.051630456, 0.00094299554, 0.061553795, -0.0014282514) * inp_3_2_1; result0 += M4(-0.14921282, -0.45711362, -0.957326, 0.26230615, 0.037424088, -0.033945464, 0.114703685, -0.0057828487, -0.08853224, 0.034291565, 0.032943837, -0.005019218, 0.005135216, -0.12015115, -0.18748857, -0.05337296) * inp_3_0_2; result0 += M4(0.087564476, -0.1250068, -0.3363717, 0.18351421, 0.05174429, 0.049167316, -0.049760886, 0.007353514, -0.037329644, -0.016148722, -0.12699443, 0.014485368, 0.105171196, -0.31134298, -0.37899444, 0.07498702) * inp_3_1_2; result0 += M4(-0.020149319, 0.124999724, -0.12147029, -0.00096658675, 0.0832416, -0.08173243, 0.05076248, 0.052645613, -0.0073318128, 0.077881485, 0.11520081, -0.0016521994, -0.0037119072, 0.09793644, 0.061810687, 0.029145136) * inp_3_2_2; result1 += M4(-0.12472872, 0.07407384, -0.07791997, -0.7134498, 0.3457181, 0.018778412, -0.064711064, -2.43281, -0.054541327, 0.04039896, -0.008637337, 0.002681813, -0.0026259732, -0.11904751, 0.019719794, 0.19572537) * inp_3_0_0; result1 += M4(0.18888874, 0.020963661, -0.018200552, -0.032447737, -0.15910539, 0.47710067, -0.49904862, -1.4134532, -0.084259026, 0.08839054, 0.028906915, 0.104726486, 0.29923728, -0.08374105, 0.01574318, -0.13314894) * inp_3_1_0; result1 += M4(0.00032150888, -0.022052694, -0.015426221, 0.08341028, -0.21296762, 0.26484624, -0.1642875, -1.1250306, -0.085825145, 0.020342987, -0.004617357, 0.10114876, -0.013172022, -0.07820508, -0.004570691, 0.04852303) * inp_3_2_0; result1 += M4(-0.51067257, -0.017741462, -0.30039522, -1.2571963, -0.02236554, 0.025866626, 0.35474715, -0.11294425, 0.015856761, -0.15779603, 0.004964842, -0.10562984, -0.092869416, -0.09270884, 0.023389773, -0.1594219) * inp_3_0_1; result1 += M4(0.109542504, 0.13075843, -0.053791624, -0.2583805, -0.0003216644, 0.09596493, 0.57191336, 0.1930133, 0.36178964, -0.32451582, 0.076985724, -0.122095495, -0.3093755, -0.013410467, 0.11794235, -0.37326223) * inp_3_1_1; result1 += M4(0.09745649, 0.110358074, 0.026718691, -0.20241807, -0.03151634, 0.046703037, 0.13132267, 0.13586995, 0.0039782166, 0.023424538, -0.06189003, 0.09849016, 0.021251155, -0.027039757, -0.001076575, 0.1751954) * inp_3_2_1; result1 += M4(-0.9915245, -0.42759714, -0.6206918, 0.44665846, 0.07352433, -0.08387705, -0.03350734, 0.22847399, 0.026893489, -0.0028833593, -0.07794939, -0.1306795, -0.039633114, -0.055868, -0.061232213, 0.15494743) * inp_3_0_2; result1 += M4(-0.36546615, -0.16674288, 0.036561977, 0.16032474, 0.025758734, 0.07587018, 0.14069304, -0.0019422671, -0.22332664, 0.109017774, 0.10702005, 0.007700021, -0.42372864, -0.049412824, 0.15198228, -0.11751534) * inp_3_1_2; result1 += M4(-0.076892026, -0.10194194, 0.023829062, -0.0664451, 0.033672974, -0.024957536, -0.049628306, 0.05266212, 0.14750876, 0.012939363, -0.06411259, -0.03776985, -0.01884732, 0.03957383, -0.021288283, -0.044852577) * inp_3_2_2; result2 += M4(0.00025199563, 0.096507885, 0.035680834, 0.06834127, 0.31596154, 0.5993435, -0.13442408, -1.0987302, 0.033409167, -0.028566176, 0.06360105, -0.016831838, 0.07198026, 0.0160935, -0.09025124, 0.039986104) * inp_3_0_0; result2 += M4(0.27852255, -0.1456235, 0.026425557, -0.03960402, 0.99083656, -0.24289148, 0.255672, -0.7740539, 0.005261177, -0.058641613, 0.33040568, -0.09337607, 0.05810956, 0.07106933, -0.115775965, 0.17179202) * inp_3_1_0; result2 += M4(0.05257131, -0.021596745, 0.0068275137, 0.046166256, 0.7031302, 0.034376085, -0.13304152, -0.29976267, 0.08550005, -0.0020981333, 0.09808359, 0.07445061, 0.061775465, 0.04428485, -0.06592481, 0.054875463) * inp_3_2_0; result2 += M4(-0.64612305, 1.2113901, 0.009581094, -0.8400584, 0.046426214, -0.09642886, -0.06831416, -0.07444592, -0.019154096, 0.065491885, -0.028710717, -0.069172874, -0.14328499, 0.17614238, -0.14039895, 0.049561076) * inp_3_0_1; result2 += M4(-0.27135545, 0.26022336, 0.008533434, -0.13182087, 0.06728565, 0.22086637, 0.06302549, 0.13521333, -0.3080801, -0.38079894, 0.20361297, -0.11221058, -0.20414463, 0.050636705, -0.022766195, -0.38828018) * inp_3_1_1; result2 += M4(-0.15456447, -0.03646629, 0.10671977, 0.040676594, 0.10400211, 0.026499726, -0.100129865, 0.012024702, 0.13864364, 0.08457329, -0.02822368, -0.022809839, -0.032735273, 0.05287141, 0.031021947, 0.050165605) * inp_3_2_1; result2 += M4(-0.5396394, 0.59668887, -0.14847545, -0.11262023, -0.01689237, -0.05919283, -0.009492103, -0.014342331, -0.019490363, -0.00014506304, 0.028579397, 0.00881577, -0.02950097, 0.020590046, -0.04593803, -0.06230642) * inp_3_0_2; result2 += M4(-0.13129808, -0.032387625, -0.11815697, -0.035777994, -0.031615224, 0.03932641, 0.033151463, -0.0030484225, 0.078017905, 0.038734343, 0.0287011, 0.08592658, -0.10403654, 0.19592181, 0.020191323, 0.038593575) * inp_3_1_2; result2 += M4(0.13677305, -0.027477767, -0.08110967, -0.13846825, 0.03510477, 0.06813107, -0.0028038565, 0.074799486, 0.048010178, -0.0062584206, -0.025137382, -0.03969312, 0.020384584, 0.020166244, -0.003604738, 0.0026067435) * inp_3_2_2; result3 += M4(-0.07958163, 0.226626, 0.010839079, -0.15146959, -0.46411514, -0.9746635, -0.16067171, 0.26086998, -0.008847108, 0.1492069, 0.08893505, -0.009487972, -0.06220731, 0.06247489, -0.061906796, 0.024931276) * inp_3_0_0; result3 += M4(0.1605299, -0.050733022, -0.009947459, 0.12511593, -0.070824474, -0.056747224, 0.05806642, 0.42568368, -0.07097865, -0.09931744, -0.037423436, 0.060617164, 0.0150114, -0.08423409, -0.05806379, 0.092213616) * inp_3_1_0; result3 += M4(-0.0023733436, -0.071667895, -0.026429838, 0.17737049, -0.2254883, -0.45036575, -0.02305606, 0.37234473, -0.024518099, -0.07007941, 0.09261846, 0.24610615, -0.008102975, 0.094132714, -0.04586088, 0.11520861) * inp_3_2_0; result3 += M4(0.04834176, -0.45825547, 0.15551898, -0.6823625, 0.077481635, -0.3181704, 0.1819974, 0.007107567, -0.07662808, -0.0008196648, -0.10875353, 0.0067090136, -0.04190695, 0.03827345, -0.14126761, -0.051619746) * inp_3_0_1; result3 += M4(0.37930316, -0.04407946, -0.01238782, 0.23310061, 0.40887487, -0.719703, 0.48665968, 0.07135413, 0.24413355, 0.18634869, 0.3390411, -0.035839304, -0.07502038, -0.67640567, 0.0016627117, -0.02085262) * inp_3_1_1; result3 += M4(0.035987593, 0.07719589, 0.096374325, -0.16219136, 0.32538247, -0.87211215, -0.07418743, 0.062578045, -0.16020074, -0.15909803, -0.05066982, -0.017460218, -0.04726886, 0.039209247, -0.014914555, -0.052938584) * inp_3_2_1; result3 += M4(-0.012261669, -0.32118237, -0.022893699, -0.45062086, 0.046327095, 0.058009263, -0.065376736, 0.0055734706, -0.008084718, -0.17385426, -0.010035008, -0.006593194, -0.004550724, 0.09920785, -0.049575035, -0.013862305) * inp_3_0_2; result3 += M4(0.17086881, 0.014676094, -0.014210215, -0.076593995, -0.017158492, 0.046289034, 0.07802933, 0.012850987, 0.036605753, -0.02279402, 0.10354422, -0.033762064, -0.029116895, 0.07669946, -0.054344196, -0.096501894) * inp_3_1_2; result3 += M4(-0.023615815, 0.07025445, -0.08641296, 0.023666015, -0.018913722, 0.12914129, -0.025764482, 0.023226101, -0.030333309, 0.09175346, -0.037754275, 0.013420556, -0.051498346, -0.06312313, 0.008825869, -0.030032167) * inp_3_2_2; const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2); imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0))); imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0))); } //!DESC ArtCNN C4F16 (Conv2D-4-ReLU) //!COMPUTE 24 32 12 16 //!HOOK LUMA //!BIND conv2d_3 //!SAVE conv2d_4 //!WIDTH LUMA.w 2.0 * //!HEIGHT LUMA.h 2.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif const ivec2 ksize = ivec2(3, 3); const ivec2 offset = ksize / 2; const ivec2 wg_size = ivec2(gl_WorkGroupSize); const ivec2 isize = wg_size + ksize - 1; shared V4 inp[4][isize.y][isize.x]; void hook() { const uvec2 local_xy = gl_LocalInvocationID.xy; ivec2 base = ivec2(gl_WorkGroupID) * wg_size; for (uint y = local_xy.y; y < isize.y; y += wg_size.y) { for (uint x = local_xy.x; x < isize.x; x += wg_size.x) { const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2); inp[0][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(0, 0), 0)); inp[1][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(1, 0), 0)); inp[2][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(0, 1), 0)); inp[3][y][x] = V4(conv2d_3_mul * texelFetch(conv2d_3_raw, input_base + ivec2(1, 1), 0)); } } barrier(); V4 result0 = V4(-0.0040135784, 0.0032267163, 0.0077154385, -0.0131194005); V4 result1 = V4(0.00494108, 0.012729458, -0.007542627, 0.009159574); V4 result2 = V4(-0.0029531752, -0.08500148, -0.007686109, -0.0058430806); V4 result3 = V4(-0.0036268765, 0.012139913, -0.005200553, -0.008799173); const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0]; const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1]; const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2]; const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0]; const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1]; const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2]; const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0]; const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1]; const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.022446819, -0.09229735, -0.0010310403, -0.02886567, -0.00047958214, 0.018106725, 0.03281179, -0.03424525, -0.024050377, 0.093056306, 0.062247727, -0.09075345, 0.017732942, 0.20980102, 0.055907458, -0.032751247) * inp_0_0_0; result0 += M4(-0.021193886, 0.031856388, -0.030064065, 0.08287286, 0.006676548, 0.1282676, 0.045911342, -0.060882766, 0.031217102, 0.027770746, -0.0755595, 0.1300494, -0.01657229, -0.04097931, -0.043443713, 0.02746677) * inp_0_1_0; result0 += M4(0.020963114, -0.018738257, -0.0023086926, 0.03609221, -0.005606688, 0.0063512777, 0.0068921973, 0.002232806, -0.014810783, 0.011125607, 0.0073286225, 0.022102851, 0.0022564398, 0.016772412, 0.022617707, -0.012849541) * inp_0_2_0; result0 += M4(-0.01166327, 0.00764596, -0.1432118, 0.11700769, 0.007080215, -0.20993969, -0.09295115, 0.048322495, 0.0017307845, -0.38891366, -0.037892267, 0.037722066, 0.068878174, 0.41997644, -0.039344516, -0.020811452) * inp_0_0_1; result0 += M4(-0.00054955384, 0.014067522, 0.29207587, -0.16043621, -0.01351949, -0.08538314, -0.18636976, 0.19134319, 0.05039821, -0.093454875, 0.23129071, -0.29693934, -0.013584321, 0.08959259, -0.2683566, 0.21759026) * inp_0_1_1; result0 += M4(0.012457035, 0.018976199, -0.10622637, 0.08818159, 0.011355054, 0.005865674, -0.018169943, -0.0063354126, -0.020067457, -0.032704737, -0.04071966, -0.021879561, 0.0071899095, 0.018855078, 0.12129616, -0.098034054) * inp_0_2_1; result0 += M4(0.0065247775, -0.08215552, -0.012008458, 0.01703386, -0.09267752, -0.05647333, 0.020818327, -0.028593764, -0.014271322, -0.03788656, -0.020144267, 0.057078477, -0.008407014, 0.26333922, 0.019777702, -0.039687045) * inp_0_0_2; result0 += M4(-0.064684056, 0.0026886077, -0.076331764, 0.09625828, 0.1110674, -0.045051012, 0.09687265, -0.05024201, -0.11028242, -0.07762572, -0.07028817, 0.036931906, 0.006109739, -0.08666, 0.017680138, 0.013017057) * inp_0_1_2; result0 += M4(-0.00059720327, -0.03823437, 0.018477794, -0.052589536, -0.07530563, 0.043434873, -0.010242496, -0.009225539, 0.009707166, 0.0665768, -0.0029107493, 0.025805281, -0.017479628, 0.037734512, 0.028485654, -0.06030078) * inp_0_2_2; result1 += M4(-0.0040805386, -0.04037355, -0.016298028, -0.10901159, 0.0075836866, 0.068849795, -0.0015867921, 0.08682354, -0.023941765, 0.09033158, 0.005955157, 0.07716451, -0.00027787118, -0.0027789993, 0.02263391, 0.03503209) * inp_0_0_0; result1 += M4(-0.07058509, -0.08898672, -0.010795215, -0.24106635, 0.060452018, 0.027231239, 0.010219462, 0.12779248, -0.018958434, 0.009836416, -0.0048490902, -0.04708482, -0.0052001905, 0.0869615, -0.0153578855, 0.21318588) * inp_0_1_0; result1 += M4(0.0018951882, 0.029790182, -0.018424176, 0.093001716, 0.021929987, 0.033413723, -0.0014516768, 0.028860403, -0.029973764, -0.038182214, -0.014042475, -0.113934435, -0.05061862, 0.018797146, 0.0062874416, 0.00840364) * inp_0_2_0; result1 += M4(0.09108281, 0.12842284, -0.018321656, 0.101437524, 0.027001908, -0.1170763, -0.02120495, 0.011102345, 0.01890289, 0.050531536, -0.0261549, 0.11677346, -0.09126036, 0.018427776, 0.019214539, -0.12438176) * inp_0_0_1; result1 += M4(0.015747905, 0.089796506, 0.070704944, 0.14218248, -0.25985935, 0.06523569, -0.074167654, -0.2468187, 0.052781288, 0.16311482, 0.011219645, 0.23202926, 0.29769585, -0.14658746, -0.011744472, -0.1010643) * inp_0_1_1; result1 += M4(0.019483117, -0.081753485, -0.04673089, 0.004953991, -0.01471788, -0.057692084, -0.014760175, 0.004616836, 0.0422738, 0.07234505, 0.016358959, 0.10570093, -0.10613088, 0.0013665945, 0.056043588, 0.033560395) * inp_0_2_1; result1 += M4(-0.032377638, -0.036992475, -0.11856061, -0.06043218, -6.167084e-05, 0.071027756, -0.0020108805, 0.18424764, -0.019465916, 0.02570314, -0.063401304, 0.005627003, 0.013067235, -0.08386608, 0.046325788, -0.07629352) * inp_0_0_2; result1 += M4(-0.03403502, -0.21124011, 0.128395, -0.118983746, 0.11138527, 0.18360862, 0.15476844, 0.20760833, -0.069354326, -0.32101798, -0.17294091, 0.0053785504, -0.007118223, 0.13378446, -0.22533989, -0.1316874) * inp_0_1_2; result1 += M4(-0.0032518269, 0.13455972, -0.047548614, 0.006280591, 0.014651311, 0.18860312, 0.012555325, 0.32387522, -0.00876885, 0.06728457, -0.03561065, -0.008271451, -0.027320381, -0.08463112, 0.123492375, -0.17134361) * inp_0_2_2; result2 += M4(0.04966542, -0.014303865, 0.009559554, 0.050956815, 0.022178259, 0.051871855, 0.014207632, 0.019486817, -0.007377738, 0.063087024, 0.0013534816, 0.073808886, 0.097131714, -0.031721152, -0.0050633196, 0.0027983838) * inp_0_0_0; result2 += M4(-0.071889535, -0.07981853, 0.029291, 0.0133744525, 0.02921235, -0.11562819, -0.038736906, 0.040879827, -0.05101825, 0.020087246, 0.027933752, -0.07056633, 0.024828767, 0.020775083, -0.03545826, 0.029901035) * inp_0_1_0; result2 += M4(0.031110793, -0.106053896, -0.10874414, 0.06562912, 0.015990185, 0.07383382, 0.034378067, -0.043125585, 0.038731106, -0.021163106, -0.07850288, 0.048384033, -0.026847137, -0.09662953, -0.045504253, 0.08047158) * inp_0_2_0; result2 += M4(-0.10335108, -0.01401889, 0.0038016094, -0.11588568, 0.036600247, 0.037577584, -0.007868014, -0.09576454, -0.018236142, -0.02697432, -0.025315316, 0.06763838, 0.17294714, 0.0654633, -0.02221289, 0.010515907) * inp_0_0_1; result2 += M4(0.031055294, 0.0437143, -0.030369015, 0.25869074, -0.080643415, 0.05039154, 0.061441638, -0.14613687, -0.06329736, -0.04495888, -0.037863605, -0.06050588, -0.037237942, 0.04440609, 0.15861167, 0.059269182) * inp_0_1_1; result2 += M4(0.0361052, -0.017285489, -0.15113652, 0.12752944, 0.001336428, -0.052559424, -0.19642851, 0.095575675, 0.065766186, -0.020980414, -0.1751973, -0.027264522, -0.16278692, -0.07947443, 0.041906893, -0.1923021) * inp_0_2_1; result2 += M4(0.0058025694, -0.10522837, -0.003142448, 0.029614868, -0.12325673, -0.10741578, 0.0011671841, 0.008194599, 0.02683286, 0.05202333, -0.0081239175, 0.054529086, -0.0066502932, -0.01873842, 0.00828223, -0.008685434) * inp_0_0_2; result2 += M4(0.011555687, 0.019120391, 0.0068035405, -0.033768255, -0.11139161, -0.07871454, 0.016408984, -0.08887099, -0.009325316, -0.014493788, -0.0042881635, -0.010129332, -0.028416067, -0.06053119, -0.028769666, -0.0074731703) * inp_0_1_2; result2 += M4(-0.13009775, -0.04216335, 0.03589062, -0.05918483, 0.0063349833, 0.075580046, -0.06637416, -0.037039958, -0.11341202, 0.011516089, 0.03915498, 0.015401149, 0.09263449, -0.043401238, 0.035506167, 0.07418732) * inp_0_2_2; result3 += M4(-0.055232104, -0.052181397, 0.09712439, 0.053068426, 0.008184627, 0.024033103, 0.0064768447, 0.01491977, -0.10160054, 0.056887053, -0.08783956, -0.15666912, 0.023039022, 0.10843832, -0.005499212, 0.07572511) * inp_0_0_0; result3 += M4(0.01975557, 0.14321215, -0.119914174, 0.083147004, -0.075017564, 0.060033094, 0.02153906, -0.097562775, 0.14652961, 0.14147766, 0.12143339, 0.009040502, 0.029516807, 0.2184188, 0.13927828, 0.024003804) * inp_0_1_0; result3 += M4(0.04161908, -0.12291186, 0.014007055, -0.0044366475, 0.065806784, 0.021362163, -0.03280152, 0.009637781, -0.0008115923, -0.11553343, -0.20894948, -0.039682742, -0.075024076, 0.094884165, 0.006231465, -0.021898488) * inp_0_2_0; result3 += M4(0.07448278, -0.08064785, 0.1783025, 0.34271577, 0.028346969, -0.16897051, 0.031269226, -0.20038049, 0.032590013, -0.21115056, 0.0056216186, 0.04599637, -0.15385234, 0.055903055, 0.14891788, -0.08639215) * inp_0_0_1; result3 += M4(0.33081916, 0.18709011, 0.10239755, 0.14202496, 0.074805714, 0.094217606, 0.078011304, 0.13345073, 0.18732202, -0.10334045, -0.2688966, 0.016904162, -0.104053885, -0.25665408, 0.5058878, -0.12103951) * inp_0_1_1; result3 += M4(-0.080311626, 0.0096476115, -0.018007856, 0.0033822898, -0.040847033, 0.00077008334, -0.06762089, -0.019284576, -0.046603885, 0.05875434, 0.020478599, -0.015789887, 0.15411441, 0.20997126, -0.055525854, 0.036925472) * inp_0_2_1; result3 += M4(0.036129907, -0.21834287, 0.046575624, 0.09891928, -0.049099892, 0.0012149358, -0.0941673, 0.06713417, 0.005362152, -0.008132467, 0.029246638, -0.025881121, -0.021541959, 0.1865162, -0.056459565, 0.15184881) * inp_0_0_2; result3 += M4(-0.099397436, 0.11888237, 0.02868581, 0.023543233, 0.08439208, -0.22925763, -0.072643846, -0.19373773, -0.03718246, -0.043030668, -0.0279274, -0.02950604, 0.014288387, 0.12906073, 0.027930567, -0.028679885) * inp_0_1_2; result3 += M4(0.035180163, 0.0027005104, 0.0058912137, 0.0039001005, -0.013516835, -0.063876055, 0.068691954, 0.050706748, 0.02708764, 0.11582471, -0.04711184, -0.0068420544, -0.05498892, 0.094348386, -0.051397104, 0.0029429044) * inp_0_2_2; const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0]; const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1]; const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2]; const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0]; const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1]; const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2]; const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0]; const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1]; const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.0128763085, 0.052068323, -0.011954824, 0.0021608714, 0.00030223568, 0.017587025, -0.01058941, 0.008313421, -0.01251023, 0.29738817, -0.17888871, 0.17669652, 0.02183395, 0.11471337, 0.012121865, -0.056569528) * inp_1_0_0; result0 += M4(0.013480782, -0.30869487, 0.12514831, -0.056587443, -0.06087438, -0.043470133, -0.12944093, 0.1333073, -0.033362042, 0.16611177, 0.26333383, -0.3493265, -0.012774537, 0.01958674, -0.025693916, -0.00576545) * inp_1_1_0; result0 += M4(-0.0045791795, 0.057620887, 0.031712543, -0.021927495, 0.034111075, 0.04401428, -0.00932385, 0.02523834, -0.05177421, -0.037475258, -0.00818403, 0.18527389, 0.0073408224, 0.013466603, -0.018797964, -0.008819587) * inp_1_2_0; result0 += M4(0.03511881, -0.31476012, -0.028296899, 0.014868918, -0.053283457, -0.11248065, -0.05588429, 0.052380826, 0.0022158825, -0.20020013, -0.013065737, 0.0059395954, -0.026061038, 0.009230161, 0.041668627, -0.04549138) * inp_1_0_1; result0 += M4(-0.03102469, 0.048471652, -0.18547377, 0.11187585, -0.03838462, 0.062065408, -0.25089842, 0.27000895, 0.15203376, 0.1822979, 0.024691727, -0.05018225, 0.11352361, -0.13844427, -0.08078384, 0.16746117) * inp_1_1_1; result0 += M4(0.05993513, 0.0053647747, -0.051664807, -0.009675976, -0.034741007, -0.016219925, -0.06747187, 0.08026824, 0.13252358, 0.2971501, 0.024876906, 0.021718679, -0.031329714, -0.0050613126, 0.085378274, -0.09904521) * inp_1_2_1; result0 += M4(-0.057906702, -0.11236472, 0.023313101, -0.02293448, -0.03768937, -0.2251109, -0.016215017, -0.00950604, 0.068863146, 0.30394804, -0.012602742, 0.013618551, -0.014835617, 0.28404757, -0.0015931885, -0.02047274) * inp_1_0_2; result0 += M4(0.10332839, 0.14611422, 0.05431722, -0.060723606, 0.09322961, 0.15063441, 0.03119704, 0.022710817, 0.01185925, 0.044084266, 0.05836641, -0.04914897, -0.11924304, -0.10219539, -0.023038983, -0.003801836) * inp_1_1_2; result0 += M4(-0.08029915, -0.046139803, 0.008697404, -0.01117896, -0.043575782, -0.09271846, -0.0020342334, -0.014879644, -0.06671466, -0.02011031, 0.023232145, -0.06101047, 0.0013811433, 0.077171504, 0.034526013, -0.060139507) * inp_1_2_2; result1 += M4(-0.019680703, -0.006418717, -0.004702196, 0.21885663, -0.019759674, 0.0102063, -0.002012741, 0.02891873, 0.08375299, 0.04083115, -0.008229031, -0.070820324, -0.033556238, -0.009796709, 0.0035726011, 0.120267294) * inp_1_0_0; result1 += M4(0.22270824, -0.05971591, 0.014297061, 0.16603453, -0.17384386, -0.082323544, -0.012659822, -0.26011658, -0.39758298, -0.27216193, -0.0003004569, 0.14809889, -0.017855754, -0.12187555, -0.0056101284, -0.36598822) * inp_1_1_0; result1 += M4(-0.0053077796, 0.07974863, 0.0044853278, 0.17476735, -0.05587634, 0.000478736, 4.7219305e-06, 0.013212377, 0.17053604, -0.061615106, 0.034866598, -0.1701907, 0.037163932, 0.0093364, -0.0059611164, 0.031372953) * inp_1_2_0; result1 += M4(-0.005903216, -0.047463845, -0.0034881225, 0.020824652, -0.009203731, 0.053911712, 0.04947658, 0.004140391, 0.102251, -0.1762965, -0.111852996, 0.0038294205, -0.062487774, -0.07207221, 0.050615903, -0.019346781) * inp_1_0_1; result1 += M4(-0.22630687, 0.045354478, -0.052379932, -0.3705097, -0.29889983, -0.077089064, 0.19144304, -0.035254218, -0.09279178, -0.41239044, 0.23899618, -0.3868468, 0.18396308, 0.12465744, 0.020112954, 0.074541666) * inp_1_1_1; result1 += M4(-0.016391076, 0.17111103, 0.005496715, 0.053091466, -0.07366343, -0.12970647, 0.030588876, -0.010529222, 0.014089911, -0.008137579, -0.07495139, 0.11865472, -0.1468576, -0.019079275, -0.032410286, -0.07259862) * inp_1_2_1; result1 += M4(0.0151464455, 0.027335307, -0.028206328, -0.029740231, -0.0077183773, -0.017693141, -0.009017493, -0.04187936, 0.0021898046, 0.016073616, -0.053040896, 0.087421216, 0.011279699, 0.036217406, 0.06301923, -0.012236634) * inp_1_0_2; result1 += M4(0.041095432, 0.2537466, 0.17225754, 0.11324849, 0.026644496, -0.08952341, 0.2803843, 0.030880537, 0.03209173, 0.28756225, 0.042938236, 0.14979622, -0.036367867, -0.30107048, -0.115396775, -0.23922656) * inp_1_1_2; result1 += M4(0.008334098, 0.1433527, -0.014165843, 0.028277045, 0.013517422, -0.012090402, 0.028212752, 0.016341574, -0.019476518, 0.08225366, 0.02954602, 0.056807607, -0.0013000853, -0.10133523, 0.10570841, 0.0040202886) * inp_1_2_2; result2 += M4(0.00020585542, -0.10595579, -0.024075527, -0.043914355, 0.035975162, -0.110578425, 0.0005285829, 0.048812922, -0.122295074, 0.024852939, 0.00926203, -0.104586475, 0.006746198, 0.026529612, -0.005823842, -0.032530647) * inp_1_0_0; result2 += M4(0.12146054, -0.02784996, -0.014604895, 0.15886171, -0.1691908, -0.10535379, -0.03362664, -0.24715196, -0.011940299, -0.028050655, -0.08629228, 0.08535456, 0.08652907, -0.0057366705, -0.013718489, -3.2006486e-05) * inp_1_1_0; result2 += M4(-6.391767e-05, -0.03701326, 0.18519332, 0.004163531, -0.008389772, -0.04877323, -0.3238021, 0.11442726, 0.42126325, 0.07359092, -0.08448461, 0.2498309, 0.0030401861, 0.057449795, 0.0109812785, 0.04928388) * inp_1_2_0; result2 += M4(-0.00012884365, -0.00072631775, -0.0041053053, 0.013112391, -0.09921218, -0.067468196, -0.014271074, -0.12439954, -0.035519533, -0.08444414, -0.017164286, 0.0024305168, 0.06430278, -0.008736951, 0.030803327, 0.08006712) * inp_1_0_1; result2 += M4(0.12315418, -0.088640936, -0.050914943, -0.044375017, 0.13423163, -0.08564678, -0.102095746, 0.18636574, 0.15668021, 0.04321122, 0.057674617, 0.17986007, -0.017220415, -0.059452653, 0.047418516, -0.25613683) * inp_1_1_1; result2 += M4(-0.065463364, -0.09421511, 0.16259682, 0.037939362, -0.1232132, -0.111910224, 0.22589701, -0.031586938, -0.15750502, 0.03500383, -0.02621489, -0.22954142, -0.13212022, 0.0063589057, -0.008052567, -0.092233874) * inp_1_2_1; result2 += M4(-0.052525025, -0.03922068, 0.003195036, 0.025051178, -0.03326035, -0.08989328, -0.006789344, 0.0030992923, 0.034772567, -0.091194555, -0.009219043, 0.0696772, -0.047364756, 0.055251293, -0.006822553, -0.12333176) * inp_1_0_2; result2 += M4(-0.017599246, -0.038083553, 0.023908038, -0.09640398, -0.013075659, -0.04444389, -0.006169872, 0.0292883, -0.046958275, -0.048682574, 0.012830524, -0.17958483, -0.024997052, 0.028825838, -0.03318397, -0.0043117087) * inp_1_1_2; result2 += M4(0.024646105, -0.10364469, -0.021210829, 0.028682416, 0.0731311, 0.02400633, 0.0031529882, -0.012837452, -0.13710135, -0.018319733, -0.015243259, -0.031686984, 0.059482705, 0.042210914, -0.09135384, -0.095729634) * inp_1_2_2; result3 += M4(0.0034101333, 0.05535736, 0.024105577, 0.021848775, -0.018094912, -0.054792136, 0.035593905, -0.107914925, 0.079538144, -0.15841089, 0.28956112, 0.2037883, -0.026085768, 0.19641061, -0.17448546, 0.02818008) * inp_1_0_0; result3 += M4(0.013099687, -0.25685248, -0.21123523, -0.058896057, 0.13408852, -0.06408254, -0.038902212, 0.148966, -0.36242187, 0.9573575, -0.38538298, 0.20866586, -0.0135457115, -0.17544252, 0.21034725, -0.01854412) * inp_1_1_0; result3 += M4(0.015325741, 0.109892145, -0.029220585, 0.0414602, -0.08171769, 0.0672044, -0.09715838, -0.024387505, -0.110984795, 0.87591684, 0.36287174, 0.02873156, -0.045318272, 0.03970228, 0.044567477, 0.019652335) * inp_1_2_0; result3 += M4(0.048121918, 0.08487519, -0.0110788485, -0.10335771, 0.053024076, -0.019237788, 0.115838595, 0.27692616, -0.038947303, -0.07734193, 0.08789802, -0.13361062, -0.084543034, -0.063710235, -0.039903134, -0.082545) * inp_1_0_1; result3 += M4(-0.111958675, -0.12849922, -0.042949416, -0.007649164, -0.09928454, -0.22055286, -0.1844109, -0.15226477, 0.06964548, 0.29758227, 0.2469849, 0.07742351, -0.094236195, 0.034951027, 0.13522738, 0.0133545175) * inp_1_1_1; result3 += M4(-0.050296642, -0.28410354, 0.015063976, -0.006128918, -0.0470889, 0.008160088, 0.0020720777, 0.039283343, 0.10286344, 0.28448346, 0.02644191, 0.0026204113, 0.01836578, 0.084340185, -0.08349685, 0.013048368) * inp_1_2_1; result3 += M4(-0.010066227, 0.015333275, 0.040386546, 0.0027275602, -0.048058737, -0.13380641, 0.047336664, -0.03337915, 0.0035930146, -0.06406163, 0.033675723, 0.12354965, 0.06876556, 0.020875918, 0.018492095, -0.09037671) * inp_1_0_2; result3 += M4(-0.011871062, -0.002620799, 0.004697188, 0.011113553, 0.0071302364, 0.16026612, -0.037049957, 0.044946592, 0.015945464, 0.122067764, -0.0033488947, 0.018354658, -0.022900213, 0.02081817, 0.024235805, 0.041325647) * inp_1_1_2; result3 += M4(0.00051608094, -0.111439236, 0.049050186, -0.0017189587, -0.012687636, -0.18043011, 0.032639593, -0.011066172, 0.003809168, 0.10372078, -0.00017059076, -0.0061963093, 0.010642897, 0.21251322, 0.01499975, -0.00060482975) * inp_1_2_2; const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0]; const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1]; const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2]; const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0]; const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1]; const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2]; const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0]; const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1]; const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.018767586, 0.37946993, 0.03240346, -0.074103445, 0.013256744, -0.73405415, 0.025663486, 0.0037305616, 0.014623672, 0.007632366, 0.013873153, -0.015707593, -0.0046343533, -0.20099154, -0.038023286, 0.022481618) * inp_2_0_0; result0 += M4(0.037362464, -0.029853325, 0.11825507, -0.1266219, -0.035908747, -0.027179906, -0.09394578, -0.107248306, 0.000541979, -0.061200544, -0.024837013, 0.0053975997, 0.017814586, -0.05382926, 0.0922865, -0.15499015) * inp_2_1_0; result0 += M4(-0.026803698, -0.03689163, 0.024773858, -0.024776613, 0.023835085, 0.103969544, -0.01580862, -0.014334969, -0.002205302, -0.0065507595, 0.058469918, -0.05255904, -0.016154546, -0.026679695, -0.0022297816, -0.014502392) * inp_2_2_0; result0 += M4(0.028078122, 0.2794735, 0.02043743, -0.013269822, -0.0069112084, -0.2905295, 0.0067768595, 0.0008341885, -0.05023395, 0.10181315, 0.07705284, -0.054124117, 0.06520775, -0.04132514, -0.11068868, 0.052190818) * inp_2_0_1; result0 += M4(-0.10037066, -0.15912087, 0.055432197, 0.3000064, -0.014009785, -0.20990136, -0.09185901, -0.18783937, 0.011323513, 0.06145226, 0.26730964, -0.38835666, 0.08074848, 0.11648274, 0.107400686, 0.13202316) * inp_2_1_1; result0 += M4(0.054533374, -0.030651595, 0.064748436, -0.11521519, -0.082970746, -0.16089259, -0.07056045, -0.0122828195, -0.008300171, 0.030350173, 0.2764626, -0.24443032, 0.027398918, -0.060851287, -0.050053008, 0.06686099) * inp_2_2_1; result0 += M4(-0.030806756, 0.28152052, 0.016293688, 0.00019218923, 0.06385456, -0.768716, -0.008862266, -0.0013578102, 0.09297159, 0.17276277, 0.015002878, -0.011491623, 0.039086744, 0.12742285, -0.037600823, -0.028224783) * inp_2_0_2; result0 += M4(-0.113680325, -0.090142876, 0.009299604, -0.042315293, -0.23041852, 0.10187753, -0.02534172, 0.015198311, -0.19454521, -0.03534942, 0.09299287, -0.05797667, 0.35324952, -0.034904387, 0.06753692, -0.06613343) * inp_2_1_2; result0 += M4(-0.0041130786, 0.07705423, 0.029097408, -0.021241808, -0.14221656, 0.071248285, 0.0027881418, 0.02272239, 0.013451467, 0.0041271113, 0.04043472, -0.049031157, 0.05890192, 0.00071542076, -0.030902434, -0.01806016) * inp_2_2_2; result1 += M4(-0.0100553315, 0.1445101, 0.025206093, 0.16367912, -0.0077398536, 0.057262916, -0.0018721124, 0.009491158, -0.019690538, -0.00050403655, -0.0022982846, 0.0064275805, 0.03182664, 0.08586602, 0.005296856, 0.11086229) * inp_2_0_0; result1 += M4(0.100263774, 0.032224674, 0.007614719, 0.3077328, -0.11689845, 0.045522682, 0.005368042, -0.6640451, 0.05671755, 0.06947968, 0.0059584933, 0.12661453, 0.114836976, 0.0078067435, 0.012673742, 0.14992271) * inp_2_1_0; result1 += M4(-0.014965994, -0.03613807, 0.017843746, 0.057888564, 0.0016416174, 0.039073598, 0.004485906, 0.01314407, -0.07619885, -0.013138787, 0.01482138, -0.014854271, 0.02104871, 0.058661107, 0.00537317, 0.059871946) * inp_2_2_0; result1 += M4(-0.019947665, -0.02337146, -0.05745076, 0.06390522, 0.0061994577, -0.03513352, 0.0039894506, -0.1870658, 0.07621767, -0.041756485, 0.0042083175, -0.09015903, 0.050684888, -0.032862004, -0.0383148, -0.06806279) * inp_2_0_1; result1 += M4(0.11575761, -0.03566747, -0.1419168, 0.07686426, -0.11935506, -0.5449443, -0.019731725, -0.4439198, 0.15448299, 0.06354772, -0.050596375, -0.17802976, -0.039333005, -0.1836554, -0.10640459, -0.024331508) * inp_2_1_1; result1 += M4(-0.06137397, -0.028483927, -0.01319596, 0.029112315, 0.02215432, -0.47851038, -0.013122113, -0.37099057, -0.064374104, 0.12437201, 0.09441625, 0.0964911, 0.061800938, -0.020516852, -0.024200046, -0.080888055) * inp_2_2_1; result1 += M4(-0.02180753, 0.2914682, 0.07562381, -0.046386164, 0.010121207, 0.020429429, 0.013896352, 0.010339173, -0.008270475, -0.0005158645, -0.0496025, 0.018030144, -0.0029540863, 0.053199697, -0.12489866, 0.07791219) * inp_2_0_2; result1 += M4(0.03735715, 0.30508387, 0.15844251, -0.1175811, -0.03410126, -0.7469112, -0.13593495, 0.020091493, 0.0056048105, -0.1562087, -0.20568803, 0.007995341, 0.045260154, 0.37609518, 0.27776894, 0.14643498) * inp_2_1_2; result1 += M4(-0.010797698, 0.18568471, 0.05050696, -0.03671986, -0.01899938, -0.3269143, -0.05808486, -0.057467256, -0.01960092, -0.17659442, 0.12925349, -0.14431082, -0.018930312, 0.18480995, -0.07412044, 0.15717995) * inp_2_2_2; result2 += M4(-0.046204865, -0.103821956, -0.010860188, 0.037352648, 0.045176484, -0.11509887, 0.0032391804, 0.05722828, -0.014318247, -0.020388361, -0.006871636, 0.012856181, 0.0663493, -0.014491342, -0.00056829635, -0.018839898) * inp_2_0_0; result2 += M4(0.015519363, -0.04379356, -0.048258897, 0.0092985695, -0.12677082, -0.0840051, -0.03845116, -0.44977376, 0.024705162, -0.08450324, -0.007273154, 0.003961096, 0.1205917, -0.06583735, 0.09004664, 0.056332048) * inp_2_1_0; result2 += M4(-0.045568097, -0.03407655, 0.13666867, -3.1876676e-05, 0.019434048, -0.0877716, -0.32850698, -0.2531008, -0.06927688, -0.05049396, 0.07952079, 0.016784674, 0.037036795, 0.06620619, 0.044526313, -0.14102459) * inp_2_2_0; result2 += M4(0.10563744, -0.054532535, -0.02444866, -0.04438278, 0.0056826673, -0.10505247, -0.0025805442, -0.011387135, -0.0163086, -0.0750018, 0.0057180603, 0.017566675, 0.020842876, -0.069835514, -0.025404071, -0.06463173) * inp_2_0_1; result2 += M4(-0.09687428, -0.013830805, -0.0043008714, 0.19830416, -0.3254863, 0.07027251, 0.090760596, -0.42412746, -0.026216919, -0.042083472, 0.08191527, 0.08890029, 0.32130423, -0.10276524, -0.046229154, -0.09222065) * inp_2_1_1; result2 += M4(-0.028740982, 0.05833635, -0.074025534, -0.11106386, -0.12051663, -0.016884184, -0.8760091, -0.21653624, -0.023070619, -0.086227246, -0.3356709, -0.10698863, 0.19418916, -0.06019335, 0.55395144, 0.04142959) * inp_2_2_1; result2 += M4(-0.10068316, -0.055732206, 0.0039255223, 0.03194576, 0.0169547, -0.11442009, 0.0066540116, -0.06813424, 0.098051235, -0.104861155, 0.00054658856, 0.035421174, 0.078026086, -0.013412237, -0.0028990135, 0.050640628) * inp_2_0_2; result2 += M4(-0.11782882, -0.123334214, -0.0017048669, -0.09074227, -0.012847767, -0.104540326, -0.024515854, 0.017064922, -0.12106071, 0.01742032, 0.024233157, -0.12282968, 0.1479409, 0.010675117, 0.03142114, -0.08293813) * inp_2_1_2; result2 += M4(0.08341529, -0.093824685, -0.0050690966, 0.016659621, -0.1323844, -0.08899898, 0.011605324, 0.009853149, 0.1091089, -0.0008812067, 0.03615029, -0.014381648, 0.035703827, -0.0058702324, -0.01723073, -0.09445252) * inp_2_2_2; result3 += M4(-0.0033674666, 0.27479184, -0.14520442, 0.056195375, 0.021564325, 0.120622255, 0.0747343, -0.06224032, -0.0316305, 0.08231498, -0.07389498, 0.02856619, -0.045992043, -0.35338366, 0.029923819, 0.039241537) * inp_2_0_0; result3 += M4(-0.08923331, 0.017857159, 0.35180876, -0.07138678, 0.04999131, -0.57993644, -0.33715415, -0.06651225, 0.05477934, -0.14006998, 0.039397564, -0.043427624, -0.06581547, 0.23676425, 0.17026886, -0.17200765) * inp_2_1_0; result3 += M4(0.10055006, 0.033518847, -0.050471883, -0.021379095, -0.13827854, -0.2650917, -0.06516931, 0.02099619, -0.07549386, 0.044028837, -0.0010062923, 0.00819925, 0.020101171, -0.03921381, 0.023089815, 0.02456863) * inp_2_2_0; result3 += M4(0.03165244, -0.07129725, -0.23964712, -0.23844421, -0.037724618, -0.17837454, 0.016393203, -0.25098523, 0.06200857, 0.086369134, -0.027192384, -0.18964183, 0.039954573, -0.17039742, 0.13961609, 0.23892525) * inp_2_0_1; result3 += M4(0.071400404, -0.015791003, 0.18592079, -0.10519635, -0.27906966, -0.4978389, -0.18447538, -0.12677093, -0.3329094, -0.0574575, 0.0017264464, -0.07430323, -0.031537917, 0.42141986, -0.11500433, -0.09284232) * inp_2_1_1; result3 += M4(-0.01741798, 0.052760378, -0.06376725, -0.053840555, 0.12993482, -0.33029583, -0.043332346, -0.0067290626, 0.20229244, 0.1036244, -0.05800761, -0.013660656, -0.09456258, -0.32976916, -0.068030186, -0.0017372206) * inp_2_2_1; result3 += M4(0.061151385, 0.28756905, -0.1282007, 0.072775304, 0.015067437, 0.06658751, -0.0007233735, 0.049608916, -0.025705693, 0.09252448, -0.081732415, -0.00092824135, -0.10873655, -0.07075192, 0.013692618, -0.10097195) * inp_2_0_2; result3 += M4(0.019418621, -0.14067507, 0.12894523, -0.08513426, -0.017363464, 0.044414137, 0.06420442, 0.0006786324, 0.1281164, 0.056295928, -0.007402897, -0.017317312, -0.006835481, 0.11457798, -0.0066917334, -0.13481094) * inp_2_1_2; result3 += M4(0.0070297364, 0.19065014, -0.017244624, 0.014097578, 0.02551897, 0.013782661, -0.00926291, -0.00593344, -0.004784505, 0.16583267, -0.021029223, -0.020769436, -0.035404734, -0.09736763, 0.00019690121, -0.01409137) * inp_2_2_2; const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0]; const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1]; const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2]; const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0]; const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1]; const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2]; const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0]; const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1]; const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.02849564, 0.20801629, 0.07789714, -0.08591794, -0.013138032, -0.12500729, -0.05955897, -0.009347837, -0.0026199075, -0.18090871, 0.04996966, -0.024295611, 0.023241067, 0.2989913, -0.022329632, -0.01068407) * inp_3_0_0; result0 += M4(0.006488096, 0.16587855, -0.023761768, 0.008591953, 0.017688885, 0.03924619, 0.037046492, 0.017823525, -0.032084648, -0.04694791, -0.038628716, 0.06675431, -0.05275973, -0.27945307, -0.10931105, 0.0933604) * inp_3_1_0; result0 += M4(-0.0011106502, -0.09068102, -0.008446835, 0.0063396683, -0.0095890695, 0.080751486, 0.03700392, -0.010667634, 0.010742994, 0.020316003, -0.09261909, 0.07645715, 0.04386066, 0.20862317, 0.011739568, -0.019822497) * inp_3_2_0; result0 += M4(0.092012346, -0.99243253, 0.08192624, -0.01996185, -0.12249764, -0.23786958, 0.02590213, 0.061792508, 0.024931097, -0.19624819, 0.020123413, -0.015653277, -0.1624149, -0.09031144, 0.036892284, -0.07276322) * inp_3_0_1; result0 += M4(0.04910099, -0.05648109, 0.23602869, -0.32097897, -0.1457341, -0.16602312, -0.13416448, -0.12093756, 0.2184975, -0.059735287, -0.27835158, 0.3410014, 0.22202568, 0.010500871, 0.07437828, 0.13752706) * inp_3_1_1; result0 += M4(-0.005880409, 0.055422354, -0.022551373, -0.0059791715, 0.018647302, 0.029757852, 0.019998867, 0.049658902, -0.04080723, -0.08332848, -0.15499778, 0.14148471, -0.17509991, -0.040453184, 0.09497388, -0.07449588) * inp_3_2_1; result0 += M4(0.08568281, -0.29432014, -0.029305942, 0.053156, 0.06250727, 0.063061155, 0.0035598758, 0.003287929, -0.025394496, -0.06501153, 0.00040421658, 0.0016468758, -0.27703464, 0.046435226, 0.012366997, -0.08402375) * inp_3_0_2; result0 += M4(0.032805514, 0.037855648, 0.056718554, -0.031252827, -0.19035767, -0.008683348, 0.040429004, 0.024449537, -0.2014328, -0.021833923, -0.08468981, 0.04139073, -0.37713024, -0.023530802, -0.08192364, 0.059064213) * inp_3_1_2; result0 += M4(0.010253931, -0.09591305, -0.05260071, 0.07549732, -0.09035534, 0.055765323, 0.0043522757, -0.02160955, 0.06055933, 0.02745642, 0.0030234493, -0.013425673, 0.12574619, 0.080521055, 0.0094557395, -0.003933899) * inp_3_2_2; result1 += M4(-0.012831924, -0.0132940365, 0.014013292, -0.038607925, -0.04236358, -0.0040199724, 0.00038156085, -0.06882647, 0.0035105674, -0.05997285, -0.019546315, -0.0690018, -0.05382405, -0.18697755, -0.016889963, -0.5278594) * inp_3_0_0; result1 += M4(0.074903056, 0.030982196, -0.009691815, 0.021653235, -0.08748461, -0.12714823, -0.0073664947, -0.2103852, 0.27545306, -0.172489, -0.0138034215, -0.29320762, 0.01954728, -0.02743053, -0.023833975, -0.19926408) * inp_3_1_0; result1 += M4(0.013739132, -0.112030625, 0.002236266, -0.11029386, 0.08357734, 0.029980266, -0.034573138, 0.19402589, 0.10511627, 0.018066818, -0.022175178, -0.06294438, -0.053210363, 0.02821048, 0.019673957, -0.048769414) * inp_3_2_0; result1 += M4(-0.055812005, -0.124672994, -0.0028307915, 0.058708996, 0.0993413, 0.11292148, 0.0051302207, 0.09086054, -0.08655739, 0.018395796, 0.02877268, 0.05893856, -0.020206036, 0.2676144, 0.046343397, 0.2983221) * inp_3_0_1; result1 += M4(-0.34456164, 0.16105084, -0.014667792, -0.11443499, -0.15428352, 0.019309709, 0.098831594, -0.22864045, 0.2626861, 0.15095614, -0.23578995, 0.29990196, 0.18960963, 0.21112593, 0.093182735, 0.47255817) * inp_3_1_1; result1 += M4(0.059633996, 0.09617087, -0.044195272, 0.10701252, -0.0113297105, -0.19980581, -0.06557026, -0.35609877, 0.19168015, -0.17455539, -0.13791767, -0.080850326, -0.01642995, 0.2259445, 0.03632761, 0.34964672) * inp_3_2_1; result1 += M4(-0.021547923, -0.033423975, 0.12387771, -0.15235455, -0.064095765, -0.22966129, -0.053911634, 0.009297953, 0.016425744, -0.031667043, 0.050109, -0.041998934, -0.0255521, -0.6661454, 0.0064874776, -0.06525574) * inp_3_0_2; result1 += M4(-0.00955878, -0.18819161, 0.20119229, 0.067589976, 0.059636027, -0.21445525, -0.15586011, 0.12951705, -0.023852644, -0.17125781, -0.22089507, -0.15525785, 0.021554073, -0.24427617, -0.11932995, -0.11995491) * inp_3_1_2; result1 += M4(0.00027492762, -0.011149431, -0.04397851, -0.015187671, -0.018693645, 0.17604014, 0.036288437, 0.027685415, 0.02450683, 0.028729985, -0.18002404, -0.06767717, -0.018331988, -0.06327862, 0.06566395, -0.04259143) * inp_3_2_2; result2 += M4(0.020330925, -0.07583019, 0.00040865978, 0.17905723, -0.12488742, -0.038913324, -0.013407421, -0.15406337, -0.029491287, -0.019691758, 0.0037197322, -0.03494804, -0.048548967, -0.10085025, -0.011280026, -0.08775244) * inp_3_0_0; result2 += M4(0.009023494, -0.08019787, 0.042378664, -0.005744673, -0.14025515, 0.04256303, 0.001598648, 0.21287486, 0.19740039, 0.006873391, 0.02995404, -0.017945066, 0.082922794, 0.045110267, -0.021023361, -0.0863003) * inp_3_1_0; result2 += M4(0.012322307, -0.07433875, 0.27594987, -0.014473471, -0.085696355, 0.0740318, -0.10467701, -0.03484768, 0.04685408, -0.017835882, 0.16549787, -0.01544419, -0.15037893, -0.03528296, -0.059958216, -0.039337374) * inp_3_2_0; result2 += M4(0.13516413, -0.015549442, 0.04405686, -0.08380343, -0.112463795, -0.11393289, 0.010840805, 0.06926739, 0.06683571, -0.013422019, 0.009995389, 0.13865924, -0.11091083, -0.04586237, -0.024832733, -0.10614919) * inp_3_0_1; result2 += M4(-0.18775412, -0.05149138, 0.26367128, 0.06821067, -0.60855925, 0.024774756, 0.021855973, -0.29352945, -0.06843421, 0.054736953, 0.05677782, -0.15776412, -0.23566322, 0.024886563, 0.18402183, -0.35429534) * inp_3_1_1; result2 += M4(0.03283841, -0.07640909, -0.23255424, 0.010227406, -0.20778243, 0.00991061, -0.17909159, 0.12931508, 0.08077656, 0.046959363, -0.17226915, 0.052532163, 0.010613912, 0.061070718, -0.24095905, -0.18562591) * inp_3_2_1; result2 += M4(0.0634982, -0.08347916, 0.03740913, -0.0013856665, 0.045188613, 0.07850495, -0.0029027993, 0.034063198, -0.010978764, 0.02016892, -0.002155239, -0.061337594, -0.28043994, -0.0872294, -0.011892446, -0.35126853) * inp_3_0_2; result2 += M4(0.3203884, -0.024319857, 0.09506602, 0.023610925, -0.27575597, -0.042449426, -0.017148286, 0.14666425, -0.13386834, 0.02054439, -0.05575814, 0.1107654, -0.16863112, 0.022383632, 0.08456575, 0.14868464) * inp_3_1_2; result2 += M4(-0.016137088, 0.002761102, -0.0030788647, 0.09071825, -0.10555493, -0.11326085, 0.016684838, -0.06471668, -0.09259946, 0.0008187052, -0.074667886, 0.007401484, 0.16636859, 0.08404795, 0.1367621, 0.019132363) * inp_3_2_2; result3 += M4(-0.084024556, -0.034183636, -0.11003828, 0.07596261, 0.009530936, -0.123412155, 0.062994644, -0.0016971333, 0.030774757, 0.06060111, -0.112204134, -0.015265249, 0.06952364, -0.12871222, -0.1793224, -0.082263656) * inp_3_0_0; result3 += M4(0.035075944, -0.03648615, -0.03251158, -0.02360845, 0.09579168, -0.093637735, -0.12766589, 0.02200376, 0.05223472, -0.38457364, 0.2035677, -0.06779454, 0.075349145, 0.24952288, 0.2585974, -0.026620159) * inp_3_1_0; result3 += M4(0.05988551, 0.024185207, 0.053580564, 0.00680627, -0.010874452, -0.118844636, 0.077303134, 0.0002133361, -0.002078051, -0.13200852, -0.059146896, -0.016534718, -0.0701598, 0.3903104, -0.044406667, 0.06022056) * inp_3_2_0; result3 += M4(-0.0680266, 0.13831161, -0.099020086, 0.027603677, -0.012557745, -0.12612383, 0.009141904, 0.0027618592, -0.048944946, -0.04472969, -0.019392889, -0.050438944, -0.30377015, 0.09719155, -0.1927275, -0.21734476) * inp_3_0_1; result3 += M4(-0.0345394, -0.087790735, -0.5650414, 0.06738396, -0.08137511, -0.33920997, -0.13534264, 0.13543794, 0.13309768, -0.15364899, 0.15392002, 0.051194873, 0.24646005, -0.035910964, 0.35060343, -0.029892717) * inp_3_1_1; result3 += M4(-0.02596073, -0.3088059, -0.008306526, -0.0453417, 0.008620145, 0.13213503, -0.14677626, 0.0022890011, -0.1017914, -0.16327615, 0.15063281, 0.03946359, 0.028780328, 0.01931414, -0.08703548, 0.047522143) * inp_3_2_1; result3 += M4(-0.028443659, -0.04586124, -0.079354525, -0.22521858, -0.0010945717, 0.18372962, -0.16510531, -0.11550843, 0.060972877, -0.0015049014, 0.011682705, -0.014153036, -0.18183236, -0.27702382, -0.13596578, -0.065135516) * inp_3_0_2; result3 += M4(0.017811868, 0.15783854, -0.028239341, 0.059509262, 0.038688328, -0.36765572, 0.10929163, -0.010511875, -0.10294899, -0.1696231, 0.008989734, 0.07924746, -0.37400815, -0.12820886, -0.0072698053, -0.12196404) * inp_3_1_2; result3 += M4(-0.0034627488, -0.07924724, 0.05619683, -0.012788033, 0.018114138, -0.024447763, -0.07260673, 0.005397169, 0.029772269, -0.095115826, -0.011049113, -0.026298985, -0.016546406, 0.17970195, -0.023208177, -0.0030759405) * inp_3_2_2; const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2); imageStore(out_image, output_base + ivec2(0, 0), max(result0, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 0), max(result1, V4(0.0))); imageStore(out_image, output_base + ivec2(0, 1), max(result2, V4(0.0))); imageStore(out_image, output_base + ivec2(1, 1), max(result3, V4(0.0))); } //!DESC ArtCNN C4F16 (Conv2D-5) //!COMPUTE 24 32 12 16 //!HOOK LUMA //!BIND conv2d_4 //!SAVE conv2d_5 //!WIDTH LUMA.w 2.0 * //!HEIGHT LUMA.h 2.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif const ivec2 ksize = ivec2(3, 3); const ivec2 offset = ksize / 2; const ivec2 wg_size = ivec2(gl_WorkGroupSize); const ivec2 isize = wg_size + ksize - 1; shared V4 inp[4][isize.y][isize.x]; void hook() { const uvec2 local_xy = gl_LocalInvocationID.xy; ivec2 base = ivec2(gl_WorkGroupID) * wg_size; for (uint y = local_xy.y; y < isize.y; y += wg_size.y) { for (uint x = local_xy.x; x < isize.x; x += wg_size.x) { const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2); inp[0][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(0, 0), 0)); inp[1][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(1, 0), 0)); inp[2][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(0, 1), 0)); inp[3][y][x] = V4(conv2d_4_mul * texelFetch(conv2d_4_raw, input_base + ivec2(1, 1), 0)); } } barrier(); V4 result0 = V4(-0.0018953535, 0.0058265817, 0.018822813, -0.013862387); V4 result1 = V4(-0.0010489298, 0.005604526, -0.00053814566, 0.0020017463); V4 result2 = V4(0.010203275, 0.011347302, 0.0063539655, -0.00053525064); V4 result3 = V4(-0.0053338343, 0.001321621, 0.036247738, -0.019181821); const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0]; const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1]; const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2]; const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0]; const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1]; const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2]; const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0]; const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1]; const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.19293816, -0.24982975, -0.21115892, 0.023415579, 0.009774828, 0.06236626, 0.08222207, -0.0629858, 0.0619333, 0.012681148, -0.14015609, 0.07153636, -0.13429844, -0.05804902, 0.18437627, -0.058576047) * inp_0_0_0; result0 += M4(-0.4484673, -0.96403027, -0.21804763, -0.33155116, 0.13409126, 0.11143466, 0.07317458, -0.034854446, 0.0052757114, -0.02699294, -0.0018612333, -0.010977338, -0.08321815, 0.047566846, 0.0144794155, 0.04763027) * inp_0_1_0; result0 += M4(0.5540873, -0.32753128, -0.6608504, -0.2889768, 0.119986236, 0.08951169, 0.1222837, -0.1704321, -0.028143425, -0.08819469, -0.033623528, 0.014351359, 0.008765508, 0.04452446, 0.020204451, -0.00047179594) * inp_0_2_0; result0 += M4(0.0037529117, 0.080815926, 0.057914045, 0.10689451, 0.043663103, 0.12956528, 0.10605515, -0.008775666, -0.063651286, -0.08790763, -0.0064682956, -0.045193557, 0.035341766, 0.037771374, -0.10269953, 0.018227393) * inp_0_0_1; result0 += M4(0.16825603, 0.12547831, 0.21248084, 0.2577808, 0.20802349, 0.13508378, 0.50583375, 0.16538978, 0.29226398, -0.73408896, 0.031909198, -0.14322773, -0.5266555, 0.68081826, -0.1012102, 0.02099461) * inp_0_1_1; result0 += M4(0.17190212, -0.01142557, -0.1571053, 0.0655869, -0.057913907, 0.36778232, -0.15601146, -0.0419103, -0.1550589, -0.123147294, 0.06394219, 0.092019394, 0.21199438, 0.15671359, -0.046308175, -0.124025375) * inp_0_2_1; result0 += M4(-0.0008404076, 0.004338585, 0.0019855557, 0.00018114936, -0.020474406, 0.06057655, 0.030161498, -0.06145807, -0.04435459, -0.08627908, 0.10944488, 0.01887649, -0.03103268, 0.03310187, -0.17767999, -0.019043589) * inp_0_0_2; result0 += M4(0.12954727, -0.12922326, 0.2350974, 0.049719814, 0.13253595, 0.09160491, -0.21646306, -0.04050591, -0.036288727, -0.056381904, 0.0121351825, 0.04496311, -0.31566897, 0.11194864, -0.42841324, -0.016267655) * inp_0_1_2; result0 += M4(-0.024690468, 0.014333521, -0.06532126, -0.0041400986, -0.10427263, 0.18187162, 0.01258527, -0.019371599, 0.1249462, 0.024022829, -0.031402778, 0.02241293, -0.26506376, 0.08613873, -0.12220515, -0.0029115404) * inp_0_2_2; result1 += M4(0.31627145, 0.15249805, -0.08140558, 0.42994848, -0.09804992, -0.012594858, 0.0026646887, -0.0043445844, 0.043157805, 0.109746225, -0.026367161, 0.09464095, -0.015199039, -0.14187415, 0.12840094, -0.067172185) * inp_0_0_0; result1 += M4(-0.1638916, -0.36235115, -0.1975086, 1.575765, -0.1965545, -0.08610196, -0.10926018, -0.00033870476, 0.040758844, -0.0061201644, -0.08756401, 0.04615102, 0.035327163, 0.044442933, 0.10312399, 0.036930207) * inp_0_1_0; result1 += M4(-0.8492598, -0.33897942, 0.72526574, 0.4631478, -0.17841868, -0.1410183, -0.16832137, -0.013158105, 0.033784576, -0.025566028, 0.0039394298, 0.0092234295, -0.02678299, -0.0073904945, -0.015628401, 0.02390855) * inp_0_2_0; result1 += M4(0.08460976, -0.009893607, 0.061330132, 0.051070828, -0.16053094, 0.007680145, -0.0623621, -0.07080623, 0.09263616, -0.07130709, 0.094535634, -0.0007521465, -0.06824691, -0.017334202, -0.14649309, 0.05471789) * inp_0_0_1; result1 += M4(0.21807747, -0.008780514, -0.23558716, -0.07192246, -0.11119064, -0.045066413, -0.01870417, 0.16801721, 0.45134255, 0.14678022, 0.013131626, -0.12167587, -0.34481934, -0.20810209, 0.008862433, 0.14668722) * inp_0_1_1; result1 += M4(0.10040009, -0.23140623, 0.13991402, 0.13232943, -0.44653964, -0.22550936, 0.041132253, -0.030094247, 0.057539612, -0.09650221, -0.20644088, 0.07337908, -0.0066268765, 0.061737567, 0.22057678, 0.13231337) * inp_0_2_1; result1 += M4(-0.0073576197, 0.019599516, 0.017028693, 0.00043496815, -0.098762505, -0.010775835, 0.034300815, -0.07583991, 0.042083025, 0.06400441, 0.08720754, -0.007852188, 0.0372069, -0.1406307, -0.076517396, -0.074295625) * inp_0_0_2; result1 += M4(0.0059161535, 0.0372593, 0.13355313, 0.22900341, -0.16262229, -0.15288241, -0.3086904, -0.120383196, 0.021071693, -0.13865116, -0.007022702, -0.07665101, -0.024432838, -0.033629846, -0.012343005, -0.23553605) * inp_0_1_2; result1 += M4(0.028445385, -0.009794425, -0.023792878, -0.075042315, -0.12081588, -0.055164486, 0.13519762, -0.108135365, -0.085314415, -0.031570014, 0.027543535, -0.044589665, 0.13610293, -0.018678987, -0.14013505, -0.15338679) * inp_0_2_2; result2 += M4(0.21252534, -0.11910489, -0.45723984, -0.06417371, -0.021582598, 0.02339392, -0.018546809, -0.012605804, 0.032364845, -0.035078682, 0.058367588, -0.0005620972, 0.051605877, 0.0215126, -0.16632713, 0.026713151) * inp_0_0_0; result2 += M4(-0.4662353, 0.36299333, -1.5159458, -0.50809747, -0.23438907, 0.030885411, 0.06186301, 0.06464797, 0.016437657, -0.0112345265, -0.114750154, -0.010771786, 0.00076314394, 0.031646077, 0.23297943, 0.08224971) * inp_0_1_0; result2 += M4(-0.5306165, 0.022781476, -0.6611916, -0.13589059, -0.29746577, -0.03359175, 0.1488752, 0.08763788, 0.0734923, 0.016807996, -0.014261707, -0.009930438, -0.052407317, -0.011143981, -0.0003923795, 0.0061985618) * inp_0_2_0; result2 += M4(0.07399199, 0.0057193898, 0.025029065, 0.037136503, -0.09694074, -0.015754705, 0.0858824, 0.02630069, 0.1436345, -0.024873883, -0.042757105, -0.0028142098, -0.13067973, 0.016069483, -0.07657447, 0.03459619) * inp_0_0_1; result2 += M4(-0.024490986, -0.056364402, -0.12789075, -0.2377805, -0.35516652, -0.11532965, 0.096392564, 0.3001136, 0.50962794, -0.09597639, -0.13577257, 0.02288129, -0.62172246, 0.2233473, -0.027296012, 0.3107101) * inp_0_1_1; result2 += M4(-0.1822321, -0.019152636, -0.03883703, 0.06758995, -0.32357153, 0.010696737, -0.20381221, -0.14015651, 0.04149892, 0.08667886, 0.052319348, -0.028778236, -0.08863007, -0.1016996, -0.026162509, 0.21760902) * inp_0_2_1; result2 += M4(-0.019655038, 0.01048052, 0.014871437, 0.0047376687, -0.05845112, -0.027396418, 0.0135491295, -0.01598734, 0.010417036, -0.027401034, 0.041378684, 0.10007943, -0.0045745275, -0.03977022, -0.06537003, -0.05988692) * inp_0_0_2; result2 += M4(0.051048204, 0.112982355, 0.030450165, -0.11833691, -0.22066031, -0.020368785, 0.07926755, -0.14934708, 0.1024632, 0.05717762, 0.07574149, 0.037296783, -0.108641684, -0.14161289, -0.18493624, -0.097702526) * inp_0_1_2; result2 += M4(0.022333367, -0.035479385, -0.06719012, 0.072104804, -0.13920522, -0.06867922, 0.1834858, 0.09901816, -0.10181411, -0.011209808, -0.03838759, -0.079485156, 0.1112582, -0.04442816, -0.03244046, 0.17494577) * inp_0_2_2; result3 += M4(-0.008458485, 0.559754, -0.1359481, -0.20492737, 0.010565355, 0.07826572, -0.04546082, -0.06438555, 0.044124458, 0.015091466, -0.039031394, 0.024674136, -0.015993683, -0.046678152, 0.038969137, 0.08587786) * inp_0_0_0; result3 += M4(-0.12672317, 0.7919048, 0.09465808, -0.28959075, 0.17336527, 0.2023873, -0.07387402, -0.09773626, 0.21337758, -0.035826, -0.023157721, -0.026020411, -0.35271186, -0.004095575, -0.043581124, 0.092579946) * inp_0_1_0; result3 += M4(-0.0762927, 0.6058523, 0.210049, -0.15095702, -0.12984839, 0.046832986, 0.003953526, -0.09747306, 0.07599791, 0.03548356, -0.052436605, 0.007820468, -0.013407755, 0.014487795, 0.04603643, 0.010825038) * inp_0_2_0; result3 += M4(-0.039665997, -0.035092063, -0.06810712, 0.108950034, -0.000649656, 0.13788638, -0.015922725, -0.0054936605, 0.017050518, -0.23598951, -0.0052512363, 0.035469808, -0.026771294, 0.17689878, 0.026165131, 0.060948808) * inp_0_0_1; result3 += M4(0.20632705, -0.16090605, -0.24408017, -0.053601943, -0.2055891, 0.5841354, -0.037936993, -0.042822365, 0.48433793, 0.15146664, -0.039210964, 0.2029982, -0.5239856, -0.14422977, 0.07044831, -0.02499571) * inp_0_1_1; result3 += M4(-0.14769523, 0.030178303, 0.007047456, 0.10304811, 0.17505062, 0.3024603, 0.0046870145, -0.20263235, 0.09975853, 0.13236913, -0.11148928, 0.22456545, -0.2844659, -0.14753824, 0.21022335, -0.16320874) * inp_0_2_1; result3 += M4(-0.021687089, 0.013914326, 0.005674425, -0.02157544, 0.06648613, 0.13842705, 0.010751784, -0.0279543, 0.09411157, 0.043079033, -0.0016886615, 0.053833924, -0.047202144, -0.04031651, -0.006346014, -0.06269334) * inp_0_0_2; result3 += M4(0.010542784, -0.038660403, -0.027727835, 0.0442041, -0.058916762, 0.21794543, -0.0050698183, -0.03180731, 0.29513338, -0.21027178, -0.030230556, 0.12158278, -0.059518564, 0.16094229, 0.092587315, -0.17798163) * inp_0_1_2; result3 += M4(-0.01876096, -0.033373438, 0.013868456, -0.06437974, 0.10535872, 0.2320939, -0.041797537, -0.03412653, 0.042804, -0.019821629, 0.017664902, 0.02014612, -0.008958272, -0.04446397, -0.04071955, -0.08878301) * inp_0_2_2; const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0]; const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1]; const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2]; const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0]; const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1]; const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2]; const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0]; const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1]; const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.01583359, 0.06754533, -0.01712357, -0.03930749, 0.019515904, 0.0476419, -0.04839703, 0.099027, -0.02762645, -0.07761426, -0.14227375, 0.005808833, 0.062241603, 0.032665376, -0.10276744, 0.0037447736) * inp_1_0_0; result0 += M4(0.176867, 0.074107334, -0.16574392, -0.06600555, -0.17686988, 0.40425017, 0.5451527, 0.13584052, -0.17639911, -0.19718716, 0.2380433, 0.040593807, 0.10853866, 0.046141937, -0.2325924, 0.007987454) * inp_1_1_0; result0 += M4(0.0772586, 0.12861234, 0.08976759, 0.03701837, 0.19125976, 0.10426821, -0.040152878, -0.07515106, -0.14654006, -0.20254305, -0.13209806, 0.07523701, 0.03458956, -0.012815592, -0.028856799, 0.029668355) * inp_1_2_0; result0 += M4(0.019110488, 0.060465347, 0.12735324, 0.029926497, 0.020046815, 0.08939445, -0.010156248, -0.0047141416, 0.009727405, -0.030973857, -0.07694125, -0.0052544693, -0.0875518, -0.080452956, -0.05976568, -0.096559025) * inp_1_0_1; result0 += M4(0.42901465, 0.2590428, 0.13991387, 0.07037277, 0.36930072, 0.10277632, 0.35448986, 0.0075549292, 0.3952787, -0.24613738, 0.39837205, -0.077002235, 0.63838506, -0.36808252, 0.0059239357, 0.23111042) * inp_1_1_1; result0 += M4(0.02320368, 0.10004724, 0.0126293525, -0.090640485, -0.067141384, 0.019517098, -0.01910994, -0.08992004, -0.015479739, -0.039290965, 0.058866188, 0.023926063, -0.08454309, -0.10365931, 0.0992991, -0.01995878) * inp_1_2_1; result0 += M4(0.017662264, 0.048447046, -0.009959364, -0.04590219, 0.015489817, 0.07478854, 0.0035829754, -0.044818107, -0.020652944, 1.7518421e-06, 0.024685964, 0.012436587, -0.009526269, -0.0948704, 0.23454551, -0.00925439) * inp_1_0_2; result0 += M4(0.10283008, -0.003436092, 0.12427426, 0.086562015, 0.204189, -0.042327147, 0.064794585, 0.015408602, 0.07648754, 0.08140967, 0.14045514, -0.025799247, -0.04064487, -0.2135315, 0.069888115, -0.020488435) * inp_1_1_2; result0 += M4(0.062685005, -0.0087055005, -0.049382843, -0.014352606, 0.028499393, -0.027238736, 0.09515428, -0.023900118, -0.071811944, -0.03655884, -0.04257451, -0.010197308, 0.029228672, -0.005827241, -0.12256195, -0.029949382) * inp_1_2_2; result1 += M4(-0.07721696, 0.019634021, -0.065468505, -0.0538411, 0.15386713, 0.105303764, -0.05682918, 0.014476583, 0.21499005, 0.09528613, -0.035307407, -0.0109019205, -0.017080473, 0.06655509, -0.14565326, 0.021859277) * inp_1_0_0; result1 += M4(-0.08070917, 0.014726409, 0.07630219, -0.1974782, -0.045234982, -0.10786042, -0.20062084, 0.00798719, 0.2289415, 0.15024285, -0.24757579, -0.11945095, -0.06408308, 0.023932865, 0.13997374, -0.08553081) * inp_1_1_0; result1 += M4(-0.103800066, -0.0119613, -0.0497909, -0.13598542, -0.090290286, 0.13484626, 0.29374337, -0.085325494, 0.13760795, 0.09128403, 0.10423313, -0.022034246, 0.0075501916, 0.033313587, 0.08877397, -0.06258862) * inp_1_2_0; result1 += M4(-0.19667187, 0.07910811, 0.14975567, 0.11546245, -0.007856336, 0.05762079, -0.026586441, -0.047359627, 0.0952913, -0.0048787566, 0.009396981, 0.07306324, -0.05333312, -0.092696056, 0.1484569, -0.014510268) * inp_1_0_1; result1 += M4(-0.4483713, -0.08914751, -0.06830535, 0.22022808, -0.02908167, 0.19700284, -0.23201343, 0.100948244, 0.10150403, 0.36964548, 0.3682474, 0.38162547, -0.07337941, 0.32787672, 0.22884098, -0.17473112) * inp_1_1_1; result1 += M4(-0.11851422, -0.1997371, -0.09265734, -0.068350844, 0.027532142, 0.0598685, 0.20098667, 0.19505432, -0.0071426006, 0.122263566, 0.09087172, 0.14061768, -0.068963185, -0.22508137, -0.27521184, 0.05226415) * inp_1_2_1; result1 += M4(-0.08752191, -0.035636704, -0.026970783, -0.0063899173, -0.06535668, -0.0097355945, 0.0016719002, -0.028427562, 0.04458897, -0.008972722, 0.0011736943, -0.040193666, 0.06178452, 0.09981747, 0.13165061, 0.066930525) * inp_1_0_2; result1 += M4(-0.08367123, -0.05284521, 0.027751574, 0.05405288, -0.013956896, 0.13211827, 0.02718628, 0.04126972, 0.013591151, 0.05696284, 0.021740496, -0.06569428, -0.13156594, -0.2217979, 0.12002505, 0.081010014) * inp_1_1_2; result1 += M4(-0.09057673, -0.065086775, 0.052175682, 0.040668074, -0.0011252841, 0.033768404, 0.055707283, 0.1775509, 0.024380935, -0.0029724136, 0.011439569, -0.036639195, -0.034154296, -0.040933546, -0.04963123, -0.27177313) * inp_1_2_2; result2 += M4(-0.07619493, 0.015114808, 0.12194339, -0.05388546, 0.122937895, 0.06766423, 0.08985467, -0.0026659921, 0.16911712, -0.12348506, 0.10205851, 0.0040722685, -0.08778425, -0.04494511, 0.13364732, -0.05704994) * inp_1_0_0; result2 += M4(0.067183, -0.08808697, 0.061798133, 0.026947606, -0.2520032, 0.27808714, -0.12131742, -0.073309034, 0.10515911, -0.05996287, -0.10040219, -0.3855947, 0.09427887, -0.040533386, -0.11460287, -0.11597825) * inp_1_1_0; result2 += M4(-0.19422445, -0.044388153, 0.030549884, -0.022434473, -0.052113403, -0.03193732, 0.058239404, -0.095854744, 0.20687638, 0.091036074, 0.040095903, -0.12101788, 0.029437194, -0.009060895, -0.091376856, -0.08326711) * inp_1_2_0; result2 += M4(-0.033702653, 0.062329434, 0.056782924, 0.1510267, 0.10268749, -0.0012141394, 0.028026294, 0.06589578, 0.07167968, 0.0382477, -0.036623612, -0.05445876, 0.061408594, -0.007828243, 0.08684674, 0.061775405) * inp_1_0_1; result2 += M4(0.18673033, -0.010594418, 0.11519751, -0.015496185, -0.060910117, 0.17213313, 0.10663717, -0.23498611, -0.11552151, 0.12067305, -0.11170714, -0.006541151, 0.41895327, -0.13074741, -0.12857904, -0.28355864) * inp_1_1_1; result2 += M4(-0.16524398, -0.055524822, -0.0810091, -0.029351775, 0.05192384, -0.032889236, 0.038031064, -0.058420878, 0.061196484, 0.009064973, 0.0010509911, 0.03512171, -0.15589699, -0.004647887, 0.021679884, 0.044907816) * inp_1_2_1; result2 += M4(-0.044117562, -0.019335665, 0.03378335, -0.027249482, -0.055734895, -0.007945039, -0.0096593145, -0.043825258, 0.00033283542, 0.00740868, -0.008989796, 0.011453801, -0.0032866534, -0.03677149, 0.08357763, 0.13552722) * inp_1_0_2; result2 += M4(-0.056846637, 0.06251862, 0.10164459, -0.049285304, 0.013761074, 0.044591248, 0.08373373, 0.05985056, -0.08572338, -0.023269488, 0.038841605, -0.015587152, 0.021170331, 0.11302778, 0.11370839, -0.32551986) * inp_1_1_2; result2 += M4(-0.05821609, 0.004350518, -0.047714118, -0.026734909, 0.010570952, 0.0018678942, -0.017686881, 0.03402182, 0.030670093, 0.0034491834, 0.046588063, 0.023750719, -0.018057328, 0.01436053, -0.016733939, -0.0806178) * inp_1_2_2; result3 += M4(0.024148023, -0.056969013, -0.007182248, -0.06978146, -0.18521935, 0.049342513, -0.1059268, -0.010528267, -0.13987224, -0.046478126, 0.011630029, 0.03975061, 0.028102472, -0.015797462, 0.041665286, -0.052644428) * inp_1_0_0; result3 += M4(0.23887786, -0.03063249, -0.0078508025, -0.110535435, -0.47775343, 0.31345952, 0.037840076, 0.1917282, -0.4675718, 0.18246669, -0.071136154, 0.07569014, 0.3363767, -0.05939815, 0.03330824, -0.1149139) * inp_1_1_0; result3 += M4(-0.03199022, -0.027461693, -0.017600277, -0.01040416, -0.3335923, -0.16069661, -0.0011826943, -0.14604628, -0.10750828, -0.031318568, -0.11515191, 0.14961019, 0.01875921, -0.05859786, 0.0062334435, -0.026974274) * inp_1_2_0; result3 += M4(0.16624868, 0.08622376, 0.013066398, -0.013561043, -0.14781322, 0.06166629, -0.01690182, 0.03240959, -0.19101089, -0.23269162, 0.023481557, 0.039373416, 0.10989115, -0.33205244, 0.06563763, -0.03763514) * inp_1_0_1; result3 += M4(0.763446, -0.36806998, -0.054857958, -0.054847606, -0.6365203, 0.124305926, -0.11834348, 0.012177892, -0.41701862, -0.12154834, 0.19443808, 0.23409277, 0.74961615, -0.14717795, 0.08279371, 0.010061856) * inp_1_1_1; result3 += M4(0.3890335, 0.11852343, 0.06878882, -0.106771834, 0.0013103477, 0.036700066, 0.061958365, -0.23648676, -0.13740712, -0.031022822, -0.005026855, -0.0040705674, 0.3481052, 0.21652177, 0.13818942, 0.14510764) * inp_1_2_1; result3 += M4(0.024962775, -0.00930012, 0.016999796, -0.007042678, -0.049675427, 0.0561294, 0.010110364, -0.020104202, -0.027516196, 0.02439481, -0.018409938, -0.015501763, 0.07540284, 0.080506206, 0.0070816637, 0.019382445) * inp_1_0_2; result3 += M4(0.18482067, 0.02875042, -0.028305996, 0.056233536, -0.25161085, -0.07676187, -0.030247726, 0.031174812, -0.16452968, 0.17400448, -0.05394027, -0.11061602, 0.5713144, -0.07015428, 0.0838255, 0.13299061) * inp_1_1_2; result3 += M4(0.066211306, 0.086373925, 0.025565708, -0.018157946, 0.023741372, 0.08362605, 0.034379553, 0.005279233, -0.03009191, -0.006536514, -0.0043627736, -0.026205579, 0.032223664, -0.11968535, 0.0042308527, -0.02536624) * inp_1_2_2; const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0]; const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1]; const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2]; const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0]; const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1]; const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2]; const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0]; const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1]; const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.020806601, 0.0011769276, -0.06854966, 0.06404834, 0.010257402, 0.09078626, -0.09029915, -0.038121767, -0.08009543, 0.024868483, 0.33892182, 0.02145583, 0.054771744, 0.049150817, -0.011070969, -0.028411673) * inp_2_0_0; result0 += M4(-0.038379848, -0.008772784, 0.029454285, 0.043145403, -0.028629266, -0.008691371, -0.040970787, -0.06477745, -0.1791925, 0.003425941, -0.14729068, 0.16579328, 0.13128689, -0.002635605, -0.13834825, 0.05271074) * inp_2_1_0; result0 += M4(-0.091530494, -0.014356574, 0.03452029, 0.058324084, -0.047370777, 0.042062152, 0.052640617, 0.08313879, 0.011118099, -0.019573802, -0.05845368, -0.027856722, 0.1666636, 0.10776295, -0.053498384, -0.035426028) * inp_2_2_0; result0 += M4(-0.07106617, -0.12926805, 0.11532138, -0.025411379, -0.02660926, -0.011154649, -0.0171705, 0.013136738, -0.65104634, 0.24976598, 0.12305868, -0.6479581, 0.24736935, -0.21200751, 0.08362573, -0.07467725) * inp_2_0_1; result0 += M4(-0.033144828, -0.2901839, 0.19474508, -0.018116087, -0.0337689, -0.0077542337, 0.022090381, 0.011201993, 0.0003965253, 0.10669989, -0.06420536, 0.21434414, -0.025104653, 0.25331414, -0.094278105, 0.015236804) * inp_2_1_1; result0 += M4(0.054170903, -0.03878034, 0.023502445, -0.04763652, 0.0436261, 0.02125075, -0.07865895, 0.008801445, -0.03805679, 0.017889298, 0.08710609, 0.028471842, 0.1409927, 0.24610753, 0.18868911, -0.0038281744) * inp_2_2_1; result0 += M4(-0.024401257, 0.029200602, -0.037531514, 0.020089343, 0.0031562252, -0.015761279, 0.08060079, 0.07492363, -0.75374854, 0.069819294, -0.6806557, -0.22842103, -0.01784711, -0.0015924854, -0.02660206, 0.06682451) * inp_2_0_2; result0 += M4(-0.18408458, 0.04495705, -0.27017567, -0.04060905, -0.060091477, 0.04199921, 0.042240314, -0.05247994, 0.01244936, -0.11446392, 0.012699051, 0.19231313, 0.16377899, -0.0021420936, 0.20178908, 0.022177307) * inp_2_1_2; result0 += M4(-0.076426975, 0.060788747, -0.12581491, -0.026730899, 0.078445174, 0.031476885, -0.018560112, -0.027070913, 0.032900266, -0.012133513, -0.06886763, 0.048137616, 0.32230917, -0.11296064, 0.15351215, -0.010818085) * inp_2_2_2; result1 += M4(0.06736406, 0.075990364, 0.0043325764, 0.043084458, 0.066263326, 0.08283939, 0.01714942, -0.043197032, -0.034201793, -0.19740304, -0.028645728, -0.42067948, -0.17401718, -0.10356778, -0.21782434, -0.1368192) * inp_2_0_0; result1 += M4(0.121906675, 0.040143315, -0.06954, 0.029039154, -0.064667046, 0.00652776, -0.016123066, -0.042725384, 0.076855555, 0.10874429, 0.057248183, 0.1804212, -0.024840739, -0.001273071, 0.20704693, -0.25340644) * inp_2_1_0; result1 += M4(0.053181823, 0.010691048, -0.041441515, -0.020986209, 0.07976909, -0.08821121, -0.028313044, 0.08507492, -0.015487181, -0.010594703, 0.04398618, 0.10098527, -0.10889402, -0.0034516156, 0.08802651, 0.039294768) * inp_2_2_0; result1 += M4(0.122033805, 0.02009872, 0.11733401, -0.01198582, 0.0035701713, 0.085737005, 0.0051125716, -0.08456334, 0.121365964, -0.4063034, -0.073627055, -0.8934959, 0.07101478, -0.0012460023, -0.13605523, 0.4156497) * inp_2_0_1; result1 += M4(0.09683759, -0.22955927, 0.10484432, -0.26306942, -0.084963664, -0.054211535, 0.0077070566, -0.030115092, 0.05744336, 0.25544357, -0.22682056, -0.22863898, 0.034472905, 0.31963208, -0.19928747, 0.9492331) * inp_2_1_1; result1 += M4(0.070213765, -0.0038610063, 0.0033997372, 0.06119504, -0.047582276, -0.09507793, -0.083675615, 0.08001755, 0.040334526, 0.037215024, -0.068379946, -0.018164538, -0.0459578, 0.09890098, -0.13228115, -0.16900617) * inp_2_2_1; result1 += M4(-0.036349338, 0.03735424, -0.013125493, -0.04058556, 0.04095882, -0.011006372, -0.09501445, 0.009363677, -0.13656218, -0.39543343, -0.08865905, -1.3125815, 0.09805664, -0.108527824, -0.039054662, -0.06415699) * inp_2_0_2; result1 += M4(0.13630877, -0.095158935, -0.10042646, -0.3309934, 0.010858195, -0.07095434, 0.045891337, -0.067784145, 0.009857397, 0.14819881, 0.054163657, 0.04177505, -0.15756398, 0.24096744, 0.013504479, 0.39090788) * inp_2_1_2; result1 += M4(0.0065320977, -0.014341278, -0.023192916, -0.05885387, -0.046773493, -0.035404146, 0.0049495073, -0.04104518, -0.0531538, -0.012901377, 0.077806786, -0.05745816, -0.21440023, -0.019557446, 0.20342024, 0.27739877) * inp_2_2_2; result2 += M4(0.057360508, 0.014503564, 0.059143994, 0.010198468, -0.02674482, 0.0022251438, 0.06575039, -0.008726433, -0.09188913, -0.12177862, 0.2584525, 0.1557489, -0.33377033, -0.037951965, 0.1340623, -0.07428824) * inp_2_0_0; result2 += M4(0.15755674, -0.027416268, 0.031757925, 0.030080441, -0.092128694, -0.05288516, 0.07390179, 0.070589304, 0.042379964, 0.05825198, 0.097974725, -0.07958051, 0.026712047, 0.021791503, -0.19539036, -0.13505116) * inp_2_1_0; result2 += M4(0.06732846, 0.02477517, 0.047421448, -0.0350228, -0.026210636, -0.07957982, 0.06680819, 0.090554416, 0.003016353, 0.002736857, -0.06721213, 0.016317641, -0.1511062, -0.03932932, -0.0411404, 0.037391737) * inp_2_2_0; result2 += M4(0.044398054, -0.0039024032, -0.039489146, 0.015305246, 0.085481696, -0.031782832, -0.048250176, -0.04576736, 0.056205854, -0.34265697, 0.3497672, 0.015577426, -0.053595997, -0.056706756, -0.110979, 0.0770339) * inp_2_0_1; result2 += M4(-0.17363448, 0.109652765, 0.194541, -0.1697986, 0.0554414, 0.027986523, 0.09817557, 0.08573731, -0.13382511, -0.059311867, -0.3453238, 0.2896559, 0.62732804, 0.21806855, -0.0051348326, -0.074557506) * inp_2_1_1; result2 += M4(-0.0042801355, -0.0034563097, -0.012686132, 0.13073713, -0.033048425, 0.08372879, -0.091064066, -0.085343346, 0.033698123, 0.0035512336, -0.015234775, 0.026158987, -0.022497516, -0.04325933, -0.08212852, -0.010965832) * inp_2_2_1; result2 += M4(0.01627358, 0.0012432971, 0.03353438, 0.020683315, 0.025408963, -0.059527367, 0.0027960425, 0.027919134, -0.09517613, -0.38686842, 0.17250046, 0.2563248, -0.031954054, -0.032314193, -0.073108226, 0.002805501) * inp_2_0_2; result2 += M4(0.11107625, 0.0020363845, -0.060272653, 0.17112932, -0.060560506, 0.08849239, -0.042447433, 0.04453629, -0.19810289, -0.030767849, 0.054445386, -0.36024103, -0.043574344, -0.0359943, 0.08719419, 0.11635041) * inp_2_1_2; result2 += M4(0.01508289, -0.03601694, -0.04689985, 0.06605909, 0.08663423, -0.019983027, 0.04276954, -0.05810252, -0.049331795, -0.011382104, 0.047108665, -0.035963446, -0.15027761, 0.04916842, 0.046085328, -0.3091649) * inp_2_2_2; result3 += M4(-0.01954963, -0.05955776, 0.0366114, 0.03241877, 0.0811955, -0.08231828, -0.017435858, -0.08624713, -0.19084726, -0.00130446, -0.0041784286, 0.36175868, 0.07715551, -0.020533009, 0.027846111, -0.05263875) * inp_2_0_0; result3 += M4(0.038249742, 0.05213642, 0.022535365, 0.07490527, 0.08845503, -0.0865709, -0.06229371, 0.043499216, -0.034865715, 0.00839816, -0.10632363, 0.15028821, 0.22725908, -0.21485256, 0.059532907, -0.100963406) * inp_2_1_0; result3 += M4(0.036063094, 0.0029133533, -0.030805418, 0.07174674, -0.02426538, -0.051893298, 0.10046242, 0.05046725, 0.021173801, 0.032548554, 0.035410833, -0.017246148, -0.047212694, -0.0019793957, 0.05575738, -0.07261234) * inp_2_2_0; result3 += M4(-0.01674299, -0.008884916, 0.010768661, -0.01639755, -0.08142729, -0.0053191427, -0.036633592, 0.07607754, -0.7825981, -0.43916854, 0.038573936, 0.37393776, -0.08752398, 0.12226086, -0.0039705704, -0.10482764) * inp_2_0_1; result3 += M4(-0.16088066, -0.121415704, 0.07007931, 0.1544625, -0.08467529, 0.018081093, 0.049794823, 0.04264284, 0.16043149, -0.032744974, -0.23247649, 0.19149105, 0.22916554, 0.4088795, -0.22852364, 0.07474784) * inp_2_1_1; result3 += M4(-0.09823339, -0.04523368, 0.13438235, 0.05503274, 0.094729334, 0.025204461, -0.010783659, 0.040687785, -0.0020735054, -0.035414696, -0.028707825, 0.009774318, 0.13749972, -0.007978513, -0.10437224, -0.16205452) * inp_2_2_1; result3 += M4(0.03452105, -0.0019801871, -0.008515022, 0.042908445, 0.023343097, -0.039268777, -0.05582678, -0.01785084, 0.1909139, 0.30671456, -0.09177103, -0.18535422, 0.03572424, 0.096279085, -0.052728545, -0.0008020096) * inp_2_0_2; result3 += M4(0.04167784, -0.09262971, -0.034826137, -0.04459923, -0.034247838, -0.087677866, -0.077725075, -0.059959635, -0.083735585, -0.08568067, 0.0026664583, 0.38268802, -0.21485022, -0.18185435, 0.031202722, 0.118082285) * inp_2_1_2; result3 += M4(-0.01308469, -0.039964344, 0.02419818, -0.051316794, -0.045073602, -0.07852878, -0.0641165, 0.10274802, -0.03722715, 0.016585138, -0.01557483, 0.054411575, 0.058821086, 0.17634773, 0.09275543, 0.16511928) * inp_2_2_2; const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0]; const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1]; const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2]; const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0]; const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1]; const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2]; const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0]; const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1]; const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.16157526, 0.0708418, -0.17742826, 0.037568346, 0.1363335, -0.039198983, -0.12991804, 0.12523568, -0.0315292, 0.066681534, 0.019297564, -0.045476478, 0.050666958, 0.0070537156, -0.0741803, 0.043249052) * inp_3_0_0; result0 += M4(-0.09018603, -0.049359206, 0.18229158, -0.014263066, -0.26950583, -0.22213577, 0.18419883, 0.07491064, 0.107961394, 0.21241283, -0.011421013, 0.004667217, -0.136152, -0.13793777, 0.17264098, -0.025761813) * inp_3_1_0; result0 += M4(0.010441085, 0.034229126, 0.10410884, 0.0007140697, -0.035234306, -0.20281972, -0.08954763, -0.04823551, -0.024772387, 0.11774257, 0.08591761, 0.03018245, -0.08788433, 0.018554667, -0.034182277, 0.059270818) * inp_3_2_0; result0 += M4(-0.046677265, 0.17860901, 0.226423, -0.004706376, 0.19468129, -0.20464435, 0.2005496, -0.08843481, -0.17709897, 0.21608044, -0.09500188, -0.0014078907, -0.0823845, -0.008207487, 0.11033482, 0.039437946) * inp_3_0_1; result0 += M4(0.39077276, -0.63461953, 0.1270757, -0.0005488689, -0.35906503, -0.65365374, -0.42657968, -0.28447995, 0.3077801, 0.5133355, 0.31003663, 0.22172691, -0.3231345, -0.11029882, -0.25303665, 0.17490047) * inp_3_1_1; result0 += M4(-0.2731292, 0.000310414, -0.15897174, 0.058956403, -0.09936814, -0.07022474, 0.08290902, 0.07600689, 0.17877856, 0.10775786, -0.04695605, -0.03202572, -0.23459665, -0.69781375, -0.27533895, -0.15403226) * inp_3_2_1; result0 += M4(-0.042978965, -0.054180793, 0.1954828, -0.08684577, -0.13747977, -0.20742784, 0.18355459, 0.11388488, 0.11764892, 0.069994465, -0.21684092, -0.051016156, 0.033618942, -0.036646932, -0.1297882, 0.023407396) * inp_3_0_2; result0 += M4(0.20020427, -0.22712614, 0.10650477, -0.005730161, -0.24823771, -0.1734196, -0.27462947, -0.0036915976, 0.17370144, 0.11744064, 0.21179977, -0.044981353, -0.0793401, 0.08837848, 0.09162515, 0.030376406) * inp_3_1_2; result0 += M4(-0.1309085, 0.015177464, 0.017351935, -0.006943054, 0.20613031, -0.07015381, 0.12872961, 0.055942796, -0.010101874, 0.054767746, 0.073140666, -0.003368886, -0.03311943, 0.018623969, -0.1904025, -0.07601106) * inp_3_2_2; result1 += M4(0.09109263, 0.24044237, -0.07368503, 0.09128383, 0.08638873, 0.066008195, 0.017524574, 0.14434199, -0.05134559, 0.016527405, 0.028160872, -0.009478669, -0.021720985, -0.0031434142, 0.055661436, 0.011320767) * inp_3_0_0; result1 += M4(0.03140381, -0.12361954, -0.2105949, -0.05841093, 0.20608658, -0.0043003242, -0.32436752, 0.108572416, 0.0137622375, 0.123948485, 0.08976507, -0.0005934278, 0.19517872, -0.09666256, 0.27647632, 0.10041602) * inp_3_1_0; result1 += M4(-0.00084548816, -0.029597878, -0.069973126, -0.15951297, 0.09769574, -0.059322637, -0.034474894, 0.05295799, 0.035118338, 0.07830424, -0.010667201, -0.04336969, 0.12945734, 0.1678381, 0.23533249, 0.19315714) * inp_3_2_0; result1 += M4(0.09013079, 0.15298937, 0.009327142, -0.33564213, 0.37823164, 0.02959889, -0.20432287, 0.045236036, -0.2741461, -0.046541527, 0.20261943, -0.10205416, 0.14046308, -0.0027450805, 0.21283917, 0.06460282) * inp_3_0_1; result1 += M4(-0.18491924, -0.21789634, 0.15175304, -0.887086, 0.71517575, 0.22929609, 0.1805253, 0.11580503, -0.68914616, -0.1291275, -0.012994776, -0.47681156, 0.4985886, 0.004125866, 0.11798174, -0.68233013) * inp_3_1_1; result1 += M4(0.07321497, -0.14313735, -0.105816804, -0.080761194, -0.06267719, 0.044976983, -0.042560253, -0.074202426, -0.112595245, -0.0060367524, 0.06645215, 0.022692902, 0.45074677, -0.31013855, 0.018929806, -0.080830745) * inp_3_2_1; result1 += M4(-0.1182609, 0.15505335, 0.14059931, -0.010010356, 0.2792563, 0.021148562, 0.21544349, -0.09246091, -0.19237362, -0.019413901, -0.19110642, 0.15954247, -0.03959726, 0.038067605, -0.06587343, 0.10105769) * inp_3_0_2; result1 += M4(0.086048566, -0.03875393, -0.010399113, -0.087426275, 0.06872017, -0.06801751, -0.1554103, -0.046588384, -0.072060585, 0.17145237, 0.16459806, 0.2941966, 0.24843049, 0.10976179, 0.0616643, 0.028753892) * inp_3_1_2; result1 += M4(0.14644687, 0.19136189, -0.012354268, -0.053453315, -0.118824765, 0.007956001, 0.14581923, 0.120377615, 0.08550302, 0.10488886, -0.049412593, 0.04564864, 0.041706, -0.22091928, -0.18077254, -0.07200271) * inp_3_2_2; result2 += M4(0.12742214, -0.019949354, 0.22852126, 0.009229048, 0.07179215, -0.035888027, -0.06847325, 0.075466916, 0.05451231, 0.04447139, -0.007379507, -0.038457163, 0.0069804247, -0.022739582, 0.015290603, 0.058539122) * inp_3_0_0; result2 += M4(0.033414926, 0.0688454, -0.01154317, 0.04373435, 0.02220928, 0.07399258, -0.018638128, -0.09049976, 0.14270648, -0.04456102, 0.09852033, 0.095695525, 0.048654117, 0.03194091, -0.2506061, 0.22547816) * inp_3_1_0; result2 += M4(-0.029927028, -0.018085415, 0.19878025, 0.0600148, 0.15179572, 0.043493457, -0.08094291, 0.05803879, -0.010909439, -0.014285486, 0.15604655, 0.008313019, 0.19220084, -0.024658643, -0.12223889, -0.044626527) * inp_3_2_0; result2 += M4(0.002232593, -0.117187135, 0.271499, -0.1504283, -0.12024035, -0.15202788, 0.07508506, -0.069154665, 0.14663363, 0.10095183, -0.079744, -0.027517907, 0.12171369, 0.04201386, -0.18390292, -0.08800392) * inp_3_0_1; result2 += M4(-0.39752677, -0.106147856, 0.31427163, -0.6602043, 0.36713195, 0.1565504, 0.659634, 0.054854617, -0.38186052, -0.14849813, -0.40023437, -0.28087738, -0.28679487, 0.10423841, 0.03607786, -0.32230294) * inp_3_1_1; result2 += M4(0.075212725, 0.05711698, 0.07420369, -0.15664868, 0.1286679, 0.12669285, 0.25749606, -0.3583118, -0.10613531, -0.07323004, -0.27757177, 0.08621346, 0.1296691, -0.031802166, 0.07022488, 0.22773924) * inp_3_2_1; result2 += M4(-0.068505526, -0.026667738, 0.12101492, 0.070010714, 0.039327346, -0.05554568, -0.10054333, 0.18232317, 0.09161801, 0.073988214, 0.07982679, -0.101606384, 0.1132725, 0.05208842, 0.033680618, 0.0011496624) * inp_3_0_2; result2 += M4(0.077517726, 0.10946592, 0.118573286, -0.19028679, 0.09755006, -0.044249807, -0.15750827, 0.061594304, -0.034222055, 0.0128054945, -0.03952524, -0.102706864, 0.25461313, 0.10715616, -0.14800839, 0.18352498) * inp_3_1_2; result2 += M4(0.112083696, -0.026794339, 0.0014270544, 0.2073605, -0.11959005, 0.03181029, 0.07840882, -0.18645816, 0.03743027, -0.03183679, -0.043347996, 0.13866818, 0.12204855, 0.03666218, -0.121538706, 0.057100873) * inp_3_2_2; result3 += M4(-0.10662995, 0.05697544, -0.10548284, -0.11210752, 0.047840722, 0.123951785, 0.009479878, 0.078141965, 0.011802129, -0.0045969826, -0.022849994, -0.067880586, 0.042395536, -0.0083619, -0.008202702, 0.06660749) * inp_3_0_0; result3 += M4(0.19675933, 0.052384473, 0.013265036, 0.098756306, -0.018376395, -0.07318726, 0.06631346, 0.10931116, -0.07211753, 0.08561086, -0.058432173, -0.018883526, -0.2426171, -0.10608711, -7.214139e-05, 0.07174066) * inp_3_1_0; result3 += M4(-0.10734781, -0.029677497, -0.0359158, 0.019730724, 0.12103277, 0.050042972, 0.047522116, 0.037930418, -0.12674676, -0.085192285, -0.10322901, -0.028011028, -0.23943996, -0.08027147, 0.035006125, -0.05514018) * inp_3_2_0; result3 += M4(-0.19616649, 0.15140127, -0.09350769, -0.12153282, -0.2692576, 0.17027298, -0.09067922, -0.099699005, 0.23312636, -0.08758269, 0.0017002, 0.01240649, 0.011466379, -0.07665326, -0.009166553, 0.1272778) * inp_3_0_1; result3 += M4(-0.10554337, -0.28817138, 0.039271317, 0.041808035, -0.44556034, 0.76822776, -0.30355525, 0.2162305, 0.33683625, -0.52597344, 0.24986169, -0.14621596, 0.18584943, -0.18752898, 0.1542312, -0.032974835) * inp_3_1_1; result3 += M4(0.16487968, 0.07279231, 0.03802793, 0.10356195, -0.07914432, 0.060889, -0.23647681, 0.08861426, 0.14644763, 0.039211847, 0.08409845, -0.13739991, 0.021675752, -0.1462796, 0.20126085, 0.04228552) * inp_3_2_1; result3 += M4(0.04344281, 0.1361205, 0.043024268, 0.022378847, 0.14772795, 0.25472307, -0.06886387, 0.036575425, -0.10773756, -0.46079612, 0.0655881, 0.045242745, -0.011777071, -0.31816006, 0.03709428, 0.083959974) * inp_3_0_2; result3 += M4(-0.105607435, 0.0121534355, -0.076223634, 0.15062469, 0.010016192, -0.03524558, 0.045847397, 0.061889775, 0.016607437, -0.05810039, 0.09952769, -0.03271708, 0.10818878, -0.1336954, 0.0047579217, -0.118375205) * inp_3_1_2; result3 += M4(-0.14107881, -0.1570039, -0.0175814, -0.0017515017, 0.029902417, 0.11668936, 0.00097337883, 0.14668427, -0.11713396, -0.12998298, -0.0028004094, -0.07370321, 0.014416133, -0.06064584, -0.014360191, -0.18469594) * inp_3_2_2; const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(2, 2); imageStore(out_image, output_base + ivec2(0, 0), result0); imageStore(out_image, output_base + ivec2(1, 0), result1); imageStore(out_image, output_base + ivec2(0, 1), result2); imageStore(out_image, output_base + ivec2(1, 1), result3); } //!DESC ArtCNN C4F16 (Conv2D-6) //!COMPUTE 12 16 12 16 //!HOOK LUMA //!BIND conv2d //!BIND conv2d_5 //!SAVE conv2d_6 //!WIDTH LUMA.w 1.0 * //!HEIGHT LUMA.h 1.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif const ivec2 ksize = ivec2(3, 3); const ivec2 offset = ksize / 2; const ivec2 wg_size = ivec2(gl_WorkGroupSize); const ivec2 isize = wg_size + ksize - 1; shared V4 inp[4][isize.y][isize.x]; void hook() { const uvec2 local_xy = gl_LocalInvocationID.xy; ivec2 base = ivec2(gl_WorkGroupID) * wg_size; for (uint y = local_xy.y; y < isize.y; y += wg_size.y) { for (uint x = local_xy.x; x < isize.x; x += wg_size.x) { const ivec2 input_base = (base + ivec2(x,y) - offset) * ivec2(2, 2); inp[0][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(0, 0), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 0), 0)); inp[1][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(1, 0), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 0), 0)); inp[2][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(0, 1), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(0, 1), 0)); inp[3][y][x] = V4(conv2d_5_mul * texelFetch(conv2d_5_raw, input_base + ivec2(1, 1), 0) + conv2d_mul * texelFetch(conv2d_raw, input_base + ivec2(1, 1), 0)); } } barrier(); V4 result0 = V4(0.08412104, 0.07971554, 0.08434169, 0.076244); const V4 inp_0_0_0 = inp[0][local_xy.y + 0][local_xy.x + 0]; const V4 inp_0_1_0 = inp[0][local_xy.y + 0][local_xy.x + 1]; const V4 inp_0_2_0 = inp[0][local_xy.y + 0][local_xy.x + 2]; const V4 inp_0_0_1 = inp[0][local_xy.y + 1][local_xy.x + 0]; const V4 inp_0_1_1 = inp[0][local_xy.y + 1][local_xy.x + 1]; const V4 inp_0_2_1 = inp[0][local_xy.y + 1][local_xy.x + 2]; const V4 inp_0_0_2 = inp[0][local_xy.y + 2][local_xy.x + 0]; const V4 inp_0_1_2 = inp[0][local_xy.y + 2][local_xy.x + 1]; const V4 inp_0_2_2 = inp[0][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.079171985, -0.0069937697, -0.0210708, -0.033105504, -0.023817735, -0.011238646, -0.043035787, -0.02440147, 0.016872514, 0.03719572, -0.0036019788, 0.012509292, 0.1484504, 0.10084735, 0.06727001, 0.09289472) * inp_0_0_0; result0 += M4(-0.090545304, -0.16164127, 0.019691145, 0.045617487, 0.0029133197, -0.011457896, -0.1087185, -0.022287082, 0.007693777, -0.1557534, 0.119432844, 0.048796117, 0.01886607, 0.13466214, 0.055788144, 0.011069647) * inp_0_1_0; result0 += M4(0.019721841, -0.04018983, -0.010603011, -0.041139323, -0.013923174, -0.0069041494, -0.03181753, -0.07459869, -0.032918666, 0.004894833, -0.01307685, 0.038587816, 0.092926815, 0.04549454, 0.06285106, 0.03428836) * inp_0_2_0; result0 += M4(0.1622716, -0.07768257, -0.013204322, 0.026763849, 0.0012736389, -0.09754597, 0.13953719, -0.07492267, 0.016014466, 0.055583894, 0.07073988, 0.007828915, 0.17086188, 0.109315015, 0.16700633, 0.119697236) * inp_0_0_1; result0 += M4(0.079182394, 0.2656691, -0.13238232, -0.17505214, -0.23513712, 0.16317949, -0.18635502, 0.3353578, -0.10498728, 0.011073413, -0.10075553, -0.17032194, 0.19368632, 0.19305302, 0.15070842, 0.23248452) * inp_0_1_1; result0 += M4(-0.002185805, 0.11091493, 0.051263936, 0.041181523, 0.062277913, -0.062091295, 0.03607194, -0.10375673, -0.010489046, -0.01102331, -0.041805126, 0.065893725, 0.15395595, 0.18233144, 0.15172338, 0.21679255) * inp_0_2_1; result0 += M4(-0.030508783, -0.004990037, 0.0329664, 0.010637486, 0.005623188, -0.003358061, 0.0080712475, 0.00046784236, 0.040540207, 0.004353267, 0.0534382, -0.010169058, 0.10396432, 0.025561178, 0.18441953, 0.004519026) * inp_0_0_2; result0 += M4(-0.04412904, -0.07338797, 0.06732339, 0.061310094, 0.08977347, 0.04197968, 0.10037899, -0.03313243, 0.029916203, 0.021829529, -0.08954633, 0.08445129, 0.035676997, 0.06716942, 0.06353302, 0.1767009) * inp_0_1_2; result0 += M4(-0.016475134, -0.01597473, -0.006870764, 0.07254226, 0.052580874, 0.030486876, 0.078329995, 0.02920045, 0.024402283, 0.02916857, 0.008138189, -0.07570842, 0.061067346, 0.12795751, 0.07376389, 0.10487863) * inp_0_2_2; const V4 inp_1_0_0 = inp[1][local_xy.y + 0][local_xy.x + 0]; const V4 inp_1_1_0 = inp[1][local_xy.y + 0][local_xy.x + 1]; const V4 inp_1_2_0 = inp[1][local_xy.y + 0][local_xy.x + 2]; const V4 inp_1_0_1 = inp[1][local_xy.y + 1][local_xy.x + 0]; const V4 inp_1_1_1 = inp[1][local_xy.y + 1][local_xy.x + 1]; const V4 inp_1_2_1 = inp[1][local_xy.y + 1][local_xy.x + 2]; const V4 inp_1_0_2 = inp[1][local_xy.y + 2][local_xy.x + 0]; const V4 inp_1_1_2 = inp[1][local_xy.y + 2][local_xy.x + 1]; const V4 inp_1_2_2 = inp[1][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.10219679, -0.0016633151, -0.08416798, -0.06091891, -0.023262773, -0.126822, 0.0497739, -0.001825667, -0.03078599, 0.04718741, 0.0043902365, -0.0053807287, 0.032797694, 0.06849138, -0.02102451, 0.0029395863) * inp_1_0_0; result0 += M4(0.029171623, -0.119060196, -0.15731424, -0.010288389, 0.044039175, 0.03523954, 0.19108313, 0.0034680488, 0.022408625, -0.100109026, 0.013178398, 0.021233274, 0.02033905, -0.1076448, 0.05507044, 0.042807907) * inp_1_1_0; result0 += M4(-0.058946244, 0.07518282, -0.0218126, -0.008712151, -0.076766334, 0.05434428, -0.022394644, 0.11280125, -0.07020608, 0.064211406, -0.01466337, 0.04543824, 0.04098286, -0.05094307, 0.017451143, -0.015480388) * inp_1_2_0; result0 += M4(-0.050165318, -0.11767163, 0.037600912, -0.0783747, -0.04539349, -0.031150874, -0.08755638, -0.14933577, 0.07179043, -0.00836788, -0.08889254, 0.024585087, -0.08558388, 0.006760057, 0.05268061, 0.03279828) * inp_1_0_1; result0 += M4(0.194146, 0.026081745, 0.22553095, 0.055226117, 0.09558362, 0.056916, -0.14006428, 0.009533805, -0.048172764, 0.06694457, 0.016996864, 0.010374402, 0.1303292, -0.0043800916, -0.06218835, 0.15707439) * inp_1_1_1; result0 += M4(0.035268318, 0.11117023, -0.0024920474, 0.053387403, 0.0001125205, 0.03284727, -0.041436322, -0.11381764, 0.036874335, -0.05483726, 0.01669829, 0.047346413, 0.01582249, 0.024571301, 0.03841291, 0.0001531784) * inp_1_2_1; result0 += M4(-0.039766025, -0.013582949, -0.047067195, 0.019274479, -0.0064281444, -0.028889643, -0.03691507, -0.025142971, -0.009235927, -0.014399523, 0.0068894643, 0.018320393, -0.009533652, 0.0047507705, -0.051280286, -0.006059686) * inp_1_0_2; result0 += M4(-0.008069688, 0.06869032, 0.06720618, -0.039600242, 0.01939327, -0.04401731, 0.16278408, 0.050619956, -0.011940157, -9.242871e-06, -0.005613433, -0.06478905, -0.04764991, -0.00894034, -0.023979772, -0.14476714) * inp_1_1_2; result0 += M4(0.034411438, -0.023818584, 0.036871288, 0.0619627, -0.02687395, -0.024658777, -0.009806786, 0.098166026, 0.04545056, -0.0035166715, 0.04391233, -0.09520481, -0.022824388, -0.02932159, -0.019960575, -0.013126476) * inp_1_2_2; const V4 inp_2_0_0 = inp[2][local_xy.y + 0][local_xy.x + 0]; const V4 inp_2_1_0 = inp[2][local_xy.y + 0][local_xy.x + 1]; const V4 inp_2_2_0 = inp[2][local_xy.y + 0][local_xy.x + 2]; const V4 inp_2_0_1 = inp[2][local_xy.y + 1][local_xy.x + 0]; const V4 inp_2_1_1 = inp[2][local_xy.y + 1][local_xy.x + 1]; const V4 inp_2_2_1 = inp[2][local_xy.y + 1][local_xy.x + 2]; const V4 inp_2_0_2 = inp[2][local_xy.y + 2][local_xy.x + 0]; const V4 inp_2_1_2 = inp[2][local_xy.y + 2][local_xy.x + 1]; const V4 inp_2_2_2 = inp[2][local_xy.y + 2][local_xy.x + 2]; result0 += M4(0.042513143, 0.036193915, 0.009267334, 0.02318279, -0.08882127, -0.06790481, -0.025598435, -0.055786755, 0.003923015, 0.08367141, -0.02124261, -0.0036895827, 0.0665154, -0.061491456, -0.010962831, 0.016966973) * inp_2_0_0; result0 += M4(-0.08984092, 0.014150137, 0.0010094845, -0.0013944331, -0.05288092, 0.014116327, 0.035593804, 0.020797336, 0.04860784, -0.100140445, 0.043347888, 0.03942333, -0.06728835, 0.14273663, 0.026525293, -0.07449118) * inp_2_1_0; result0 += M4(0.037428003, -0.08130763, -0.010963262, -0.066873446, 0.05335851, 0.13654499, 0.032492008, 0.045801986, 0.009653935, 0.021689994, 0.025831793, 0.03277937, -0.025519112, 0.043301772, -0.0030657381, 0.046498533) * inp_2_2_0; result0 += M4(0.0417986, 0.08424531, 0.04000319, 0.09104747, -0.16142552, -0.021476395, -0.19967215, -0.037973598, -0.048555277, 0.032703213, -0.033633802, 0.10496307, 0.057205033, -0.055697214, 0.097497426, -0.08111985) * inp_2_0_1; result0 += M4(-0.061038256, -0.24523515, -0.3324259, -0.2994336, -0.41386676, -0.3934725, -0.2869577, -0.27902487, -0.053344678, -0.14278612, -0.15018024, -0.20823519, -0.034474645, -0.116564386, -0.0512445, 0.043638732) * inp_2_1_1; result0 += M4(-0.11793706, -0.024524925, -0.07900098, -0.06977263, 0.4436866, 0.095661, 0.44399175, 0.16541167, 0.017248677, 0.017089522, 0.0531066, 0.044330012, 0.0172881, 0.015262031, 0.06628099, 0.11591024) * inp_2_2_1; result0 += M4(0.034315176, 0.012758982, 0.098702334, 0.017511975, -0.06429503, 0.010301114, -0.1138796, 0.018430594, -0.011578993, 0.0050334428, 0.022777617, 0.036444314, -0.021219404, 0.022264466, -0.036809668, -0.015400894) * inp_2_0_2; result0 += M4(0.018624214, -0.04151733, 0.14313662, 0.023933941, 0.12591304, 0.046684355, -0.065231994, -0.067107506, 0.017609427, 0.10223146, 0.07743975, -0.12169869, 0.009158939, -0.025597682, -0.08834877, -0.079456866) * inp_2_1_2; result0 += M4(-0.0021108948, 0.014920775, 0.00642783, -0.022019196, 0.12250748, 0.1506987, 0.16710696, 0.15760566, -0.01257941, 0.0050026863, -0.00019434035, 0.066300645, -0.00047425524, 0.03707401, 0.001639517, 0.026377758) * inp_2_2_2; const V4 inp_3_0_0 = inp[3][local_xy.y + 0][local_xy.x + 0]; const V4 inp_3_1_0 = inp[3][local_xy.y + 0][local_xy.x + 1]; const V4 inp_3_2_0 = inp[3][local_xy.y + 0][local_xy.x + 2]; const V4 inp_3_0_1 = inp[3][local_xy.y + 1][local_xy.x + 0]; const V4 inp_3_1_1 = inp[3][local_xy.y + 1][local_xy.x + 1]; const V4 inp_3_2_1 = inp[3][local_xy.y + 1][local_xy.x + 2]; const V4 inp_3_0_2 = inp[3][local_xy.y + 2][local_xy.x + 0]; const V4 inp_3_1_2 = inp[3][local_xy.y + 2][local_xy.x + 1]; const V4 inp_3_2_2 = inp[3][local_xy.y + 2][local_xy.x + 2]; result0 += M4(-0.03845043, 0.008121379, -0.04463431, -0.021435143, -0.0154112475, 0.008068699, 0.005275401, -0.008412226, 0.011113799, -0.0042059543, 0.021885974, 0.0046847453, -0.097053245, -0.07207643, -0.016230254, -0.057805564) * inp_3_0_0; result0 += M4(0.020068355, -0.059013963, 0.042093556, 0.007965266, 0.006641936, 0.0053823865, -0.04869122, -0.023604743, -0.05277748, -0.08194451, -0.059982788, -0.052345853, -0.033831976, -0.07052958, 0.027305871, 0.022747202) * inp_3_1_0; result0 += M4(-0.03663662, 0.027668301, -0.005128969, 0.04180462, -0.07902732, 0.07694945, -0.037715025, 0.025260974, 0.017048629, 0.05412875, 0.029637456, 0.04848225, -0.055232473, -0.090560995, -0.052049804, -0.053691596) * inp_3_2_0; result0 += M4(-0.041993283, -0.06591007, 0.06993194, -0.014823973, -0.029537756, 0.02038244, -0.05351582, 0.07256843, 0.01772454, -0.04179623, 0.019432299, 0.023107331, -0.03684704, 0.012458426, -0.06536513, 0.03873883) * inp_3_0_1; result0 += M4(0.19751841, 0.2748706, -0.24543872, -0.079816476, 0.11848018, -0.26794544, 0.11034708, -0.16498065, -0.51390487, -0.41391793, -0.39137235, -0.3589357, 0.37141094, 0.3205968, 0.35281453, 0.25800362) * inp_3_1_1; result0 += M4(0.03477724, 0.005542787, 0.0653558, -0.0044532754, -0.06219415, 0.13614522, -0.113096826, 0.04641227, 0.047544006, -0.011667869, 0.0035703634, 0.006406095, -0.10998655, -0.14912339, -0.102509305, -0.1656886) * inp_3_2_1; result0 += M4(-0.015407814, -0.016495913, -0.036472354, -0.009568684, -0.0074188295, 0.021021923, 0.011698857, 0.019404247, 0.09597954, 0.059281915, 0.2022051, 0.05709749, -0.034792032, 0.014734844, -0.041519232, 0.02060852) * inp_3_0_2; result0 += M4(-0.026737554, -0.05625069, 0.069094196, 0.013394634, 0.06341689, -0.005378102, 0.101725005, -0.05227663, 0.18010192, 0.17765583, 0.042139824, 0.15793523, 0.14893357, 0.057833694, 0.09549699, -0.004848631) * inp_3_1_2; result0 += M4(-0.015824724, -0.022639401, -0.011159335, -0.0022217152, 0.0025551994, 0.0073573054, 0.011508081, 0.09528377, 0.09761966, 0.17366165, 0.11282759, 0.1593583, -0.014511227, -0.007068555, -0.021812135, -0.03897337) * inp_3_2_2; const ivec2 output_base = ivec2(gl_GlobalInvocationID) * ivec2(1, 1); imageStore(out_image, output_base + ivec2(0, 0), result0); } //!DESC ArtCNN C4F16 (Depth-To-Space) //!COMPUTE 12 16 12 16 //!HOOK LUMA //!BIND conv2d_6 //!WIDTH LUMA.w 2.0 * //!HEIGHT LUMA.h 2.0 * //!COMPONENTS 4 //!WHEN OUTPUT.w LUMA.w / 1.3 > OUTPUT.h LUMA.h / 1.3 > * #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #ifdef GL_EXT_shader_explicit_arithmetic_types_float16 # define V4 f16vec4 # define M4 f16mat4 # define F float16_t #else # define V4 vec4 # define M4 mat4 # define F float #endif void hook() { vec4 result = vec4(0.0, 0.0, 0.0, 1.0); vec2 f0 = fract(conv2d_6_pos * conv2d_6_size); ivec2 i0 = ivec2(f0 * vec2(2.0)); result.x = conv2d_6_tex((vec2(0.5) - f0) * conv2d_6_pt + conv2d_6_pos)[i0.y * 2 + i0.x]; imageStore(out_image, ivec2(gl_GlobalInvocationID), clamp(result, 0.0, 1.0)); }