static const char *hb_gpu_paint_fragment_wgsl = "/*\n" " * Copyright (C) 2026 Behdad Esfahbod\n" " *\n" " * This is part of HarfBuzz, a text shaping library.\n" " *\n" " * Permission is hereby granted, without written agreement and without\n" " * license or royalty fees, to use, copy, modify, and distribute this\n" " * software and its documentation for any purpose, provided that the\n" " * above copyright notice and the following two paragraphs appear in\n" " * all copies of this software.\n" " *\n" " * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR\n" " * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES\n" " * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN\n" " * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n" " * DAMAGE.\n" " *\n" " * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,\n" " * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\n" " * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS\n" " * ON AN \"AS IS\" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO\n" " * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.\n" " */\n" "\n" "\n" "/* Paint-renderer fragment shader (WGSL).\n" " *\n" " * Assumes the shared fragment helpers (hb-gpu-fragment.wgsl) and\n" " * the draw-renderer fragment helpers (hb-gpu-draw-fragment.wgsl)\n" " * are prepended to this source.\n" " *\n" " * atlas is passed as an explicit storage-buffer pointer parameter,\n" " * matching WGSL's scoping rules.\n" " */\n" "\n" "\n" "fn _hb_gpu_stop_color (hb_gpu_atlas: ptr>, read>,\n" " stops_base: i32, i: i32, foreground: vec4f,\n" " offset: ptr) -> vec4f\n" "{\n" " let a = hb_gpu_fetch (hb_gpu_atlas, stops_base + i * 2);\n" " *offset = f32 (a.r) / 32767.0;\n" " let b = hb_gpu_fetch (hb_gpu_atlas, stops_base + i * 2 + 1);\n" " if ((a.g & 1) != 0) {\n" " return vec4f (foreground.rgb, foreground.a * (f32 (b.a) / 32767.0));\n" " }\n" " return vec4f (b) / 32767.0;\n" "}\n" "\n" "fn _hb_gpu_extend_t (t: f32, extend: i32) -> f32\n" "{\n" " if (extend == 1) { return t - floor (t); }\n" " if (extend == 2) {\n" " let u = t - 2.0 * floor (t * 0.5);\n" " if (u > 1.0) { return 2.0 - u; }\n" " return u;\n" " }\n" " return clamp (t, 0.0, 1.0);\n" "}\n" "\n" "fn _hb_gpu_eval_stops (hb_gpu_atlas: ptr>, read>,\n" " stops_base: i32, stop_count: i32,\n" " t: f32, foreground: vec4f) -> vec4f\n" "{\n" " var off_prev: f32;\n" " var col_prev = _hb_gpu_stop_color (hb_gpu_atlas, stops_base, 0, foreground, &off_prev);\n" " if (t <= off_prev) { return col_prev; }\n" " for (var i: i32 = 1; i < stop_count; i = i + 1)\n" " {\n" " var off: f32;\n" " let col = _hb_gpu_stop_color (hb_gpu_atlas, stops_base, i, foreground, &off);\n" " if (t <= off)\n" " {\n" " let span = off - off_prev;\n" " var f: f32 = 0.0;\n" " if (span > 1e-6) { f = (t - off_prev) / span; }\n" " let p0 = vec4f (col_prev.rgb * col_prev.a, col_prev.a);\n" " let p1 = vec4f (col.rgb * col.a, col.a);\n" " let pm = mix (p0, p1, f);\n" " if (pm.a > 1e-6) { return vec4f (pm.rgb / pm.a, pm.a); }\n" " return vec4f (0.0);\n" " }\n" " col_prev = col;\n" " off_prev = off;\n" " }\n" " return col_prev;\n" "}\n" "\n" "/* Apply the stored 2x2 M^-1 (row-major i16 Q10) to a vector. */\n" "fn _hb_gpu_apply_minv (m: vec4, v: vec2f) -> vec2f\n" "{\n" " let mf = vec4f (m) * (1.0 / 1024.0);\n" " return vec2f (mf.x * v.x + mf.y * v.y,\n" " mf.z * v.x + mf.w * v.y);\n" "}\n" "\n" "fn _hb_gpu_sample_linear (renderCoord: vec2f, grad_base: i32,\n" " stop_count: i32, extend: i32, foreground: vec4f,\n" " hb_gpu_atlas: ptr>, read>) -> vec4f\n" "{\n" " let t0 = hb_gpu_fetch (hb_gpu_atlas, grad_base);\n" " let m = hb_gpu_fetch (hb_gpu_atlas, grad_base + 1);\n" " let p0_r = vec2f (f32 (t0.r), f32 (t0.g));\n" " let d = vec2f (f32 (t0.b), f32 (t0.a));\n" " let denom = dot (d, d);\n" " if (denom < 1e-6) { return vec4f (0.0); }\n" " let p = _hb_gpu_apply_minv (m, renderCoord - p0_r);\n" " var t = dot (p, d) / denom;\n" " t = _hb_gpu_extend_t (t, extend);\n" " return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 2, stop_count, t, foreground);\n" "}\n" "\n" "fn _hb_gpu_sample_radial (renderCoord: vec2f, grad_base: i32,\n" " stop_count: i32, extend: i32, foreground: vec4f,\n" " hb_gpu_atlas: ptr>, read>) -> vec4f\n" "{\n" " let t0 = hb_gpu_fetch (hb_gpu_atlas, grad_base);\n" " let t1 = hb_gpu_fetch (hb_gpu_atlas, grad_base + 1);\n" " let m = hb_gpu_fetch (hb_gpu_atlas, grad_base + 2);\n" " let c0_r = vec2f (f32 (t0.r), f32 (t0.g));\n" " let cd = vec2f (f32 (t0.b), f32 (t0.a));\n" " let r0 = f32 (t1.r);\n" " let r1 = f32 (t1.g);\n" "\n" " let dr = r1 - r0;\n" " let p = _hb_gpu_apply_minv (m, renderCoord - c0_r);\n" "\n" " let A = dot (cd, cd) - dr * dr;\n" " let B = -2.0 * (dot (p, cd) + r0 * dr);\n" " let C = dot (p, p) - r0 * r0;\n" "\n" " var t: f32;\n" " if (abs (A) > 1e-6)\n" " {\n" " let disc = B * B - 4.0 * A * C;\n" " if (disc < 0.0) { return vec4f (0.0); }\n" " let sq = sqrt (disc);\n" " let t1r = (-B + sq) / (2.0 * A);\n" " let t2r = (-B - sq) / (2.0 * A);\n" " if (r0 + t1r * dr >= 0.0) { t = t1r; } else { t = t2r; }\n" " }\n" " else\n" " {\n" " if (abs (B) < 1e-6) { return vec4f (0.0); }\n" " t = -C / B;\n" " }\n" " t = _hb_gpu_extend_t (t, extend);\n" " return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 3, stop_count, t, foreground);\n" "}\n" "\n" "fn _hb_gpu_sample_sweep (renderCoord: vec2f, grad_base: i32,\n" " stop_count: i32, extend: i32, foreground: vec4f,\n" " hb_gpu_atlas: ptr>, read>) -> vec4f\n" "{\n" " let t0 = hb_gpu_fetch (hb_gpu_atlas, grad_base);\n" " let m = hb_gpu_fetch (hb_gpu_atlas, grad_base + 1);\n" " let c_r = vec2f (f32 (t0.r), f32 (t0.g));\n" " let a0 = f32 (t0.b) / 16384.0;\n" " let a1 = f32 (t0.a) / 16384.0;\n" " let span = a1 - a0;\n" " if (abs (span) < 1e-6) { return vec4f (0.0); }\n" "\n" " let p = _hb_gpu_apply_minv (m, renderCoord - c_r);\n" " var ang = atan2 (p.y, p.x) / 3.14159265358979;\n" " if (ang < 0.0) { ang = ang + 2.0; }\n" " var t = (ang - a0) / span;\n" " t = _hb_gpu_extend_t (t, extend);\n" " return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 2, stop_count, t, foreground);\n" "}\n" "\n" "fn _hb_gpu_composite (src: vec4f, dst: vec4f, mode_in: i32) -> vec4f\n" "{\n" " var r = src + dst * (1.0 - src.a); /* SRC_OVER default */\n" "\n" " /* Mode numbers match hb_paint_composite_mode_t. Approximate\n" " * unsupported modes with the nearest Porter-Duff mode we do\n" " * implement; DIFFERENCE / EXCLUSION / HSL_* still fall through to\n" " * SRC_OVER below. */\n" " var mode = mode_in;\n" " if (mode == 14 || mode == 18 || mode == 19) { mode = 23; } /* OVERLAY / COLOR_BURN / HARD_LIGHT -> MULTIPLY */\n" " else if (mode == 17 || mode == 20) { mode = 13; } /* COLOR_DODGE / SOFT_LIGHT -> SCREEN */\n" "\n" " if (mode == 0) { r = vec4f (0.0); } /* CLEAR */\n" " else if (mode == 1) { r = src; } /* SRC */\n" " else if (mode == 2) { r = dst; } /* DST */\n" " else if (mode == 4) { r = dst + src * (1.0 - dst.a); } /* DST_OVER */\n" " else if (mode == 5) { r = src * dst.a; } /* SRC_IN */\n" " else if (mode == 6) { r = dst * src.a; } /* DST_IN */\n" " else if (mode == 7) { r = src * (1.0 - dst.a); } /* SRC_OUT */\n" " else if (mode == 8) { r = dst * (1.0 - src.a); } /* DST_OUT */\n" " else if (mode == 9) { r = src * dst.a + dst * (1.0 - src.a); } /* SRC_ATOP */\n" " else if (mode == 10) { r = dst * src.a + src * (1.0 - dst.a); } /* DST_ATOP */\n" " else if (mode == 11) { r = src * (1.0 - dst.a) + dst * (1.0 - src.a); } /* XOR */\n" " else if (mode == 12) { r = min (src + dst, vec4f (1.0)); } /* PLUS */\n" " else if (mode == 13) { /* SCREEN (premul) */\n" " r = vec4f (src.rgb + dst.rgb - src.rgb * dst.rgb,\n" " src.a + dst.a - src.a * dst.a);\n" " }\n" " else if (mode == 15) { /* DARKEN */\n" " r = vec4f (min (src.rgb * dst.a, dst.rgb * src.a)\n" " + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a),\n" " src.a + dst.a - src.a * dst.a);\n" " }\n" " else if (mode == 16) { /* LIGHTEN */\n" " r = vec4f (max (src.rgb * dst.a, dst.rgb * src.a)\n" " + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a),\n" " src.a + dst.a - src.a * dst.a);\n" " }\n" " else if (mode == 23) { /* MULTIPLY (premul) */\n" " r = vec4f (src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a)\n" " + src.rgb * dst.rgb,\n" " src.a + dst.a - src.a * dst.a);\n" " }\n" " /* SRC_OVER (3) and DIFFERENCE / EXCLUSION / HSL_* (21, 22, 24-27)\n" " * fall through to the SRC_OVER default. */\n" " return r;\n" "}\n" "\n" "/* Wrap _hb_gpu_slug with a sub-glyph extents bail-out. Many\n" " * paint layers cover a small region of the outer glyph quad; for\n" " * fragments outside the layer's bbox (with an AA + MSAA-spread\n" " * margin) the slug coverage is exactly 0, so we can skip the\n" " * band/curve walk entirely. */\n" "fn _hb_gpu_slug_clipped (renderCoord: vec2f, pixelsPerEm: vec2f, glyphLoc_: u32,\n" " hb_gpu_atlas: ptr>, read>) -> f32\n" "{\n" " let header0 = hb_gpu_fetch (hb_gpu_atlas, i32 (glyphLoc_));\n" " let ext = vec4f (header0) * HB_GPU_INV_UNITS;\n" " let margin = 2.0 / pixelsPerEm;\n" " if (any (renderCoord < ext.xy - margin) ||\n" " any (renderCoord > ext.zw + margin)) {\n" " return 0.0;\n" " }\n" " return _hb_gpu_slug (renderCoord, pixelsPerEm, glyphLoc_, hb_gpu_atlas);\n" "}\n" "\n" "/* Combine slug coverages from all clip outlines on the layer.\n" " * Factored so the shader has one set of inlined slug walks\n" " * instead of two (one per LAYER op type). flags bits: 0x100 =\n" " * HAS_CLIP2; 0x200 = HAS_CLIP3 (HAS_CLIP3 implies HAS_CLIP2). */\n" "fn _hb_gpu_layer_coverage (renderCoord: vec2f, pixelsPerEm: vec2f,\n" " base: i32, flags: i32,\n" " clip1_payload: i32, clip2_payload: i32, clip3_payload: i32,\n" " hb_gpu_atlas: ptr>, read>) -> f32\n" "{\n" " var cov = _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n" " u32 (base + clip1_payload), hb_gpu_atlas);\n" " if ((flags & 0x100) != 0) {\n" " cov = cov * _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n" " u32 (base + clip2_payload), hb_gpu_atlas);\n" " if ((flags & 0x200) != 0) {\n" " cov = cov * _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n" " u32 (base + clip3_payload), hb_gpu_atlas);\n" " }\n" " }\n" " return cov;\n" "}\n" "\n" "const HB_GPU_PAINT_GROUP_DEPTH: i32 = 4;\n" "\n" "fn hb_gpu_paint (renderCoord: vec2f, glyphLoc_: u32, foreground: vec4f,\n" " hb_gpu_atlas: ptr>, read>,\n" " coverage: ptr) -> vec4f\n" "{\n" " /* Compute pixelsPerEm once here at uniform control flow. WGSL\n" " * rejects fwidth inside a loop-conditional branch, so we call\n" " * _hb_gpu_slug (the MSAA-aware implementation that takes a\n" " * pre-computed pixelsPerEm) instead of the top-level\n" " * hb_gpu_draw() which would re-call fwidth. */\n" " let emsPerPixel = fwidth (renderCoord);\n" " let pixelsPerEm = 1.0 / emsPerPixel;\n" "\n" " let base = i32 (glyphLoc_);\n" " let h0 = hb_gpu_fetch (hb_gpu_atlas, base); // (num_ops, _, _, _)\n" " let h2 = hb_gpu_fetch (hb_gpu_atlas, base + 2); // (ops_offset, _, _, _)\n" " let num_ops = h0.r;\n" " var cursor = base + h2.r;\n" "\n" " var acc = vec4f (0.0);\n" " var group_stack: array;\n" " var sp: i32 = 0;\n" " *coverage = 0.0;\n" "\n" " for (var i: i32 = 0; i < num_ops; i = i + 1)\n" " {\n" " let op = hb_gpu_fetch (hb_gpu_atlas, cursor);\n" " let op_type = op.r;\n" " let aux = op.g;\n" " let payload = (op.b << 16) | (op.a & 0xffff);\n" "\n" " if (op_type == 0) { // LAYER_SOLID\n" " // texel 1: (clip2_hi, clip2_lo, clip3_hi, clip3_lo) -- valid\n" " // per HAS_CLIP2 / HAS_CLIP3 flag bits.\n" " // texel 2: RGBA as signed Q15.\n" " let op2 = hb_gpu_fetch (hb_gpu_atlas, cursor + 1);\n" " let clip2_payload = (op2.r << 16) | (op2.g & 0xffff);\n" " let clip3_payload = (op2.b << 16) | (op2.a & 0xffff);\n" " let ct = hb_gpu_fetch (hb_gpu_atlas, cursor + 2);\n" " var col: vec4f;\n" " if ((aux & 1) != 0) {\n" " col = vec4f (foreground.rgb, foreground.a * (f32 (ct.a) / 32767.0));\n" " } else {\n" " col = vec4f (ct) / 32767.0;\n" " }\n" "\n" " let cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm,\n" " base, aux,\n" " payload, clip2_payload, clip3_payload,\n" " hb_gpu_atlas);\n" " *coverage = max (*coverage, cov);\n" " let src = vec4f (col.rgb * col.a, col.a) * cov;\n" " acc = src + acc * (1.0 - src.a);\n" "\n" " cursor = cursor + 3;\n" " } else if (op_type == 1) { // LAYER_GRADIENT\n" " let op2 = hb_gpu_fetch (hb_gpu_atlas, cursor + 1);\n" " let clip2_payload = (op2.r << 16) | (op2.g & 0xffff);\n" " let clip3_payload = (op2.b << 16) | (op2.a & 0xffff);\n" " let op3 = hb_gpu_fetch (hb_gpu_atlas, cursor + 2);\n" " let grad_payload = (op3.r << 16) | (op3.g & 0xffff);\n" " let extend = op3.b;\n" " let stop_count = op3.a;\n" " let subtype = aux & 0xff;\n" "\n" " var col = vec4f (0.0);\n" " if (subtype == 0) {\n" " col = _hb_gpu_sample_linear (renderCoord,\n" " base + grad_payload,\n" " stop_count, extend, foreground,\n" " hb_gpu_atlas);\n" " } else if (subtype == 1) {\n" " col = _hb_gpu_sample_radial (renderCoord,\n" " base + grad_payload,\n" " stop_count, extend, foreground,\n" " hb_gpu_atlas);\n" " } else if (subtype == 2) {\n" " col = _hb_gpu_sample_sweep (renderCoord,\n" " base + grad_payload,\n" " stop_count, extend, foreground,\n" " hb_gpu_atlas);\n" " }\n" "\n" " let cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm,\n" " base, aux,\n" " payload, clip2_payload, clip3_payload,\n" " hb_gpu_atlas);\n" " *coverage = max (*coverage, cov);\n" " let src = vec4f (col.rgb * col.a, col.a) * cov;\n" " acc = src + acc * (1.0 - src.a);\n" "\n" " cursor = cursor + 3;\n" " } else if (op_type == 2) { // PUSH_GROUP\n" " if (sp < HB_GPU_PAINT_GROUP_DEPTH) {\n" " group_stack[sp] = acc;\n" " sp = sp + 1;\n" " }\n" " acc = vec4f (0.0);\n" " cursor = cursor + 1;\n" " } else if (op_type == 3) { // POP_GROUP\n" " if (sp > 0) {\n" " sp = sp - 1;\n" " let src = acc;\n" " let dst = group_stack[sp];\n" " acc = _hb_gpu_composite (src, dst, aux);\n" " }\n" " cursor = cursor + 1;\n" " } else {\n" " break;\n" " }\n" " }\n" "\n" " return acc;\n" "}\n" ;