static const char *hb_gpu_paint_fragment_glsl = "/*\n" " * Copyright (C) 2026 Behdad Esfahbod\n" " *\n" " * This is part of HarfBuzz, a text shaping library.\n" " *\n" " * Permission is hereby granted, without written agreement and without\n" " * license or royalty fees, to use, copy, modify, and distribute this\n" " * software and its documentation for any purpose, provided that the\n" " * above copyright notice and the following two paragraphs appear in\n" " * all copies of this software.\n" " *\n" " * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR\n" " * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES\n" " * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN\n" " * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n" " * DAMAGE.\n" " *\n" " * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,\n" " * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\n" " * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS\n" " * ON AN \"AS IS\" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO\n" " * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.\n" " */\n" "\n" "\n" "/* Paint-renderer fragment shader.\n" " *\n" " * Assumes the shared fragment helpers (hb-gpu-fragment.glsl) and\n" " * the draw-renderer fragment helpers (hb-gpu-draw-fragment.glsl)\n" " * are prepended to this source. The draw helper provides\n" " * hb_gpu_draw() which this interpreter calls to compute clip-glyph\n" " * coverage.\n" " */\n" "\n" "\n" "/* Fetch the i'th stop of a gradient color line starting at @stops_base\n" " * (2 texels per stop). Resolves is_foreground to @foreground. */\n" "vec4 _hb_gpu_stop_color (int stops_base, int i, vec4 foreground, out float offset)\n" "{\n" " ivec4 a = hb_gpu_fetch (stops_base + i * 2);\n" " offset = float (a.r) / 32767.0;\n" " ivec4 b = hb_gpu_fetch (stops_base + i * 2 + 1);\n" " if ((a.g & 1) != 0)\n" " return vec4 (foreground.rgb, foreground.a * (float (b.a) / 32767.0));\n" " return vec4 (b) / 32767.0;\n" "}\n" "\n" "/* Apply the color-line extend mode to a projected `t` value. */\n" "float _hb_gpu_extend_t (float t, int extend)\n" "{\n" " if (extend == 1) { /* HB_PAINT_EXTEND_REPEAT */\n" " return t - floor (t);\n" " } else if (extend == 2) { /* HB_PAINT_EXTEND_REFLECT */\n" " float u = t - 2.0 * floor (t * 0.5);\n" " return u > 1.0 ? 2.0 - u : u;\n" " }\n" " return clamp (t, 0.0, 1.0); /* PAD (default) */\n" "}\n" "\n" "/* Walk stops starting at @stops_base and return the sampled color\n" " * at @t. Same logic reused by all gradient subtypes. */\n" "vec4 _hb_gpu_eval_stops (int stops_base, int stop_count, float t, vec4 foreground)\n" "{\n" " float off_prev;\n" " vec4 col_prev = _hb_gpu_stop_color (stops_base, 0, foreground, off_prev);\n" " if (t <= off_prev)\n" " return col_prev;\n" " for (int i = 1; i < stop_count; i++)\n" " {\n" " float off;\n" " vec4 col = _hb_gpu_stop_color (stops_base, i, foreground, off);\n" " if (t <= off)\n" " {\n" " float span = off - off_prev;\n" " float f = span > 1e-6 ? (t - off_prev) / span : 0.0;\n" " /* Interpolate in premultiplied space per OpenType COLR spec. */\n" " vec4 p0 = vec4 (col_prev.rgb * col_prev.a, col_prev.a);\n" " vec4 p1 = vec4 (col.rgb * col.a, col.a);\n" " vec4 pm = mix (p0, p1, f);\n" " return pm.a > 1e-6 ? vec4 (pm.rgb / pm.a, pm.a) : vec4 (0.0);\n" " }\n" " col_prev = col;\n" " off_prev = off;\n" " }\n" " return col_prev;\n" "}\n" "\n" "/* Apply the stored 2x2 M^-1 (row-major i16 Q10) to @v. Scaling\n" " * renderCoord deltas back into canonical gradient space. */\n" "vec2 _hb_gpu_apply_minv (ivec4 m, vec2 v)\n" "{\n" " vec4 mf = vec4 (m) * (1.0 / 1024.0);\n" " return vec2 (mf.x * v.x + mf.y * v.y,\n" " mf.z * v.x + mf.w * v.y);\n" "}\n" "\n" "/* Sample a linear gradient whose param blob starts at @grad_base:\n" " * texel 0: (p0_rendered.x, p0_rendered.y, d_canonical.x, d_canonical.y)\n" " * texel 1: L^-1 as i16 Q10 (row-major)\n" " * texels 2..: stops (2 texels each)\n" " * Evaluate t in untransformed space. */\n" "vec4 _hb_gpu_sample_linear (vec2 renderCoord, int grad_base,\n" " int stop_count, int extend, vec4 foreground)\n" "{\n" " ivec4 t0 = hb_gpu_fetch (grad_base);\n" " ivec4 m = hb_gpu_fetch (grad_base + 1);\n" " vec2 p0_r = vec2 (float (t0.r), float (t0.g));\n" " vec2 d = vec2 (float (t0.b), float (t0.a));\n" " float denom = dot (d, d);\n" " if (denom < 1e-6) return vec4 (0.0);\n" " vec2 p = _hb_gpu_apply_minv (m, renderCoord - p0_r);\n" " float t = dot (p, d) / denom;\n" " t = _hb_gpu_extend_t (t, extend);\n" "\n" " return _hb_gpu_eval_stops (grad_base + 2, stop_count, t, foreground);\n" "}\n" "\n" "/* Sample a two-circle radial gradient whose param blob starts at\n" " * @grad_base:\n" " * texel 0: (c0_rendered.x, c0_rendered.y, d_canonical.x, d_canonical.y)\n" " * d = c1 - c0 in untransformed space\n" " * texel 1: (r0, r1, _, _) in untransformed font units\n" " * texel 2: L^-1 as i16 Q10 (row-major)\n" " * texels 3..: stops (2 texels each)\n" " * Solves |p - t*cd|^2 = (r0 + t*(r1-r0))^2 with p in untransformed\n" " * space, so non-uniform scale / shear on the transform becomes a\n" " * proper ellipse-in-rendered-space instead of a scalar-fudge. */\n" "vec4 _hb_gpu_sample_radial (vec2 renderCoord, int grad_base,\n" " int stop_count, int extend, vec4 foreground)\n" "{\n" " ivec4 t0 = hb_gpu_fetch (grad_base);\n" " ivec4 t1 = hb_gpu_fetch (grad_base + 1);\n" " ivec4 m = hb_gpu_fetch (grad_base + 2);\n" " vec2 c0_r = vec2 (float (t0.r), float (t0.g));\n" " vec2 cd = vec2 (float (t0.b), float (t0.a));\n" " float r0 = float (t1.r);\n" " float r1 = float (t1.g);\n" "\n" " float dr = r1 - r0;\n" " vec2 p = _hb_gpu_apply_minv (m, renderCoord - c0_r);\n" "\n" " float A = dot (cd, cd) - dr * dr;\n" " float B = -2.0 * (dot (p, cd) + r0 * dr);\n" " float C = dot (p, p) - r0 * r0;\n" "\n" " float t;\n" " if (abs (A) > 1e-6)\n" " {\n" " float disc = B * B - 4.0 * A * C;\n" " if (disc < 0.0) return vec4 (0.0);\n" " float sq = sqrt (disc);\n" " /* Prefer the larger root; fall back to the smaller if the\n" " * larger gives a negative interpolated radius. */\n" " float t1 = (-B + sq) / (2.0 * A);\n" " float t2 = (-B - sq) / (2.0 * A);\n" " t = (r0 + t1 * dr >= 0.0) ? t1 : t2;\n" " }\n" " else\n" " {\n" " if (abs (B) < 1e-6) return vec4 (0.0);\n" " t = -C / B;\n" " }\n" "\n" " t = _hb_gpu_extend_t (t, extend);\n" " return _hb_gpu_eval_stops (grad_base + 3, stop_count, t, foreground);\n" "}\n" "\n" "/* Sample a sweep gradient whose param blob starts at @grad_base:\n" " * texel 0: (center_rendered.x, center_rendered.y, start_q14, end_q14)\n" " * start/end are Q14 fractions of pi in untransformed space\n" " * texel 1: L^-1 as i16 Q10 (row-major)\n" " * texels 2..: stops (2 texels each) */\n" "vec4 _hb_gpu_sample_sweep (vec2 renderCoord, int grad_base,\n" " int stop_count, int extend, vec4 foreground)\n" "{\n" " ivec4 t0 = hb_gpu_fetch (grad_base);\n" " ivec4 m = hb_gpu_fetch (grad_base + 1);\n" " vec2 c_r = vec2 (float (t0.r), float (t0.g));\n" " float a0 = float (t0.b) / 16384.0; /* fraction of pi */\n" " float a1 = float (t0.a) / 16384.0;\n" " float span = a1 - a0;\n" " if (abs (span) < 1e-6) return vec4 (0.0);\n" "\n" " vec2 p = _hb_gpu_apply_minv (m, renderCoord - c_r);\n" " /* atan2 returns (-pi, pi]; normalize to [0, 2) fractions of pi. */\n" " float ang = atan (p.y, p.x) / 3.14159265358979;\n" " if (ang < 0.0) ang += 2.0;\n" " float t = (ang - a0) / span;\n" " t = _hb_gpu_extend_t (t, extend);\n" " return _hb_gpu_eval_stops (grad_base + 2, stop_count, t, foreground);\n" "}\n" "\n" "/* Composite two premultiplied RGBA layers using one of the COLRv1\n" " * compositing modes. Unsupported modes fall back to SRC_OVER.\n" " * Values match hb_paint_composite_mode_t. */\n" "vec4 _hb_gpu_composite (vec4 src, vec4 dst, int mode)\n" "{\n" " vec4 r = src + dst * (1.0 - src.a); /* SRC_OVER default */\n" "\n" " /* Approximate unsupported COLRv1 modes with the nearest Porter-Duff\n" " * mode we do implement. Better a recognizable rendering than a\n" " * silent SRC_OVER fallback. DIFFERENCE / EXCLUSION / HSL_* are\n" " * not similar enough to anything we have, so they still fall\n" " * through to SRC_OVER below. */\n" " if (mode == 14 || mode == 18 || mode == 19) mode = 23; /* OVERLAY / COLOR_BURN / HARD_LIGHT -> MULTIPLY */\n" " else if (mode == 17 || mode == 20) mode = 13; /* COLOR_DODGE / SOFT_LIGHT -> SCREEN */\n" "\n" " if (mode == 0) r = vec4 (0.0); /* CLEAR */\n" " else if (mode == 1) r = src; /* SRC */\n" " else if (mode == 2) r = dst; /* DST */\n" " else if (mode == 4) r = dst + src * (1.0 - dst.a); /* DST_OVER */\n" " else if (mode == 5) r = src * dst.a; /* SRC_IN */\n" " else if (mode == 6) r = dst * src.a; /* DST_IN */\n" " else if (mode == 7) r = src * (1.0 - dst.a); /* SRC_OUT */\n" " else if (mode == 8) r = dst * (1.0 - src.a); /* DST_OUT */\n" " else if (mode == 9) /* SRC_ATOP */\n" " r = src * dst.a + dst * (1.0 - src.a);\n" " else if (mode == 10) /* DST_ATOP */\n" " r = dst * src.a + src * (1.0 - dst.a);\n" " else if (mode == 11) /* XOR */\n" " r = src * (1.0 - dst.a) + dst * (1.0 - src.a);\n" " else if (mode == 12) /* PLUS */\n" " r = min (src + dst, vec4 (1.0));\n" " else if (mode == 13) { /* SCREEN (premul) */\n" " r.rgb = src.rgb + dst.rgb - src.rgb * dst.rgb;\n" " r.a = src.a + dst.a - src.a * dst.a;\n" " }\n" " else if (mode == 15) { /* DARKEN */\n" " r.rgb = min (src.rgb * dst.a, dst.rgb * src.a)\n" " + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a);\n" " r.a = src.a + dst.a - src.a * dst.a;\n" " }\n" " else if (mode == 16) { /* LIGHTEN */\n" " r.rgb = max (src.rgb * dst.a, dst.rgb * src.a)\n" " + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a);\n" " r.a = src.a + dst.a - src.a * dst.a;\n" " }\n" " else if (mode == 23) { /* MULTIPLY (premul) */\n" " r.rgb = src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a)\n" " + src.rgb * dst.rgb;\n" " r.a = src.a + dst.a - src.a * dst.a;\n" " }\n" " /* SRC_OVER (3) and DIFFERENCE / EXCLUSION / HSL_* (21, 22, 24-27)\n" " * fall through to the SRC_OVER default. */\n" "\n" " return r;\n" "}\n" "\n" "/* Wrap _hb_gpu_slug with a sub-glyph extents bail-out. Many\n" " * paint layers cover a small region of the outer glyph quad; for\n" " * fragments outside the layer's bbox (with an AA + MSAA-spread\n" " * margin) the slug coverage is exactly 0, so we can skip the\n" " * band/curve walk entirely. */\n" "float _hb_gpu_slug_clipped (vec2 renderCoord, vec2 pixelsPerEm, uint glyphLoc_)\n" "{\n" " ivec4 header0 = hb_gpu_fetch (int (glyphLoc_));\n" " vec4 ext = vec4 (header0) * HB_GPU_INV_UNITS;\n" " vec2 margin = 2.0 / pixelsPerEm;\n" " if (any (lessThan (renderCoord, ext.xy - margin)) ||\n" " any (greaterThan (renderCoord, ext.zw + margin)))\n" " return 0.0;\n" " return _hb_gpu_slug (renderCoord, pixelsPerEm, glyphLoc_);\n" "}\n" "\n" "/* Combine slug coverages from all clip outlines on the current\n" " * layer. Factored out of LAYER_SOLID and LAYER_GRADIENT so the\n" " * shader has one set of inlined slug walks instead of two. flags\n" " * bits: 0x100 = HAS_CLIP2; 0x200 = HAS_CLIP3 (HAS_CLIP3 implies\n" " * HAS_CLIP2). */\n" "float _hb_gpu_layer_coverage (vec2 renderCoord, vec2 pixelsPerEm,\n" " int base, int flags,\n" " int clip1_payload, int clip2_payload, int clip3_payload)\n" "{\n" " float cov = _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n" " uint (base + clip1_payload));\n" " if ((flags & 0x100) != 0)\n" " {\n" " cov *= _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n" " uint (base + clip2_payload));\n" " if ((flags & 0x200) != 0)\n" " cov *= _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n" " uint (base + clip3_payload));\n" " }\n" " return cov;\n" "}\n" "\n" "/* Walks the paint blob's flat op stream and returns a\n" " * premultiplied RGBA coverage value for the current fragment.\n" " *\n" " * glyphLoc: atlas texel offset of the paint-blob header.\n" " * foreground: caller-supplied foreground color, used when an op\n" " * sets the is_foreground flag.\n" " */\n" "#define HB_GPU_PAINT_GROUP_DEPTH 4\n" "\n" "vec4 hb_gpu_paint (vec2 renderCoord, uint glyphLoc, vec4 foreground,\n" " out float coverage)\n" "{\n" " /* fwidth once, at uniform control flow: every per-layer\n" " * coverage sample below uses this pre-computed pixelsPerEm via\n" " * _hb_gpu_slug. */\n" " vec2 pixelsPerEm = 1.0 / fwidth (renderCoord);\n" "\n" " int base = int (glyphLoc);\n" " ivec4 h0 = hb_gpu_fetch (base); /* (num_ops, _, _, _) */\n" " ivec4 h2 = hb_gpu_fetch (base + 2); /* (ops_offset, _, _, _) */\n" " int num_ops = h0.r;\n" " int cursor = base + h2.r;\n" "\n" " vec4 acc = vec4 (0.0);\n" " vec4 group_stack[HB_GPU_PAINT_GROUP_DEPTH];\n" " int sp = 0;\n" " coverage = 0.0;\n" "\n" " for (int i = 0; i < num_ops; i++)\n" " {\n" " ivec4 op = hb_gpu_fetch (cursor);\n" " int op_type = op.r;\n" " int aux = op.g;\n" " int payload = (op.b << 16) | (op.a & 0xffff);\n" "\n" " if (op_type == 0) /* LAYER_SOLID */\n" " {\n" " /* texel 1: (clip2_hi, clip2_lo, clip3_hi, clip3_lo) -- valid\n" " * per HAS_CLIP2 / HAS_CLIP3 flag bits.\n" " * texel 2: RGBA as signed Q15. */\n" " ivec4 op2 = hb_gpu_fetch (cursor + 1);\n" " int clip2_payload = (op2.r << 16) | (op2.g & 0xffff);\n" " int clip3_payload = (op2.b << 16) | (op2.a & 0xffff);\n" " ivec4 ct = hb_gpu_fetch (cursor + 2);\n" " vec4 col = ((aux & 1) != 0)\n" " ? vec4 (foreground.rgb, foreground.a * (float (ct.a) / 32767.0))\n" " : vec4 (ct) / 32767.0;\n" "\n" " float cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm,\n" " base, aux,\n" " payload, clip2_payload, clip3_payload);\n" " coverage = max (coverage, cov);\n" " vec4 src = vec4 (col.rgb * col.a, col.a) * cov;\n" " acc = src + acc * (1.0 - src.a);\n" "\n" " cursor += 3;\n" " }\n" " else if (op_type == 1) /* LAYER_GRADIENT */\n" " {\n" " /* texel 1: (clip2_hi, clip2_lo, clip3_hi, clip3_lo) -- valid\n" " * per HAS_CLIP2 / HAS_CLIP3 flag bits.\n" " * texel 2: (grad_payload_hi, grad_payload_lo, extend, stop_count) */\n" " ivec4 op2 = hb_gpu_fetch (cursor + 1);\n" " int clip2_payload = (op2.r << 16) | (op2.g & 0xffff);\n" " int clip3_payload = (op2.b << 16) | (op2.a & 0xffff);\n" " ivec4 op3 = hb_gpu_fetch (cursor + 2);\n" " int grad_payload = (op3.r << 16) | (op3.g & 0xffff);\n" " int extend = op3.b;\n" " int stop_count = op3.a;\n" " int subtype = aux & 0xff;\n" "\n" " vec4 col = vec4 (0.0);\n" " if (subtype == 0) /* linear */\n" " col = _hb_gpu_sample_linear (renderCoord,\n" " base + grad_payload,\n" " stop_count, extend, foreground);\n" " else if (subtype == 1) /* radial */\n" " col = _hb_gpu_sample_radial (renderCoord,\n" " base + grad_payload,\n" " stop_count, extend, foreground);\n" " else if (subtype == 2) /* sweep */\n" " col = _hb_gpu_sample_sweep (renderCoord,\n" " base + grad_payload,\n" " stop_count, extend, foreground);\n" "\n" " float cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm,\n" " base, aux,\n" " payload, clip2_payload, clip3_payload);\n" " coverage = max (coverage, cov);\n" " vec4 src = vec4 (col.rgb * col.a, col.a) * cov;\n" " acc = src + acc * (1.0 - src.a);\n" "\n" " cursor += 3;\n" " }\n" " else if (op_type == 2) /* PUSH_GROUP */\n" " {\n" " if (sp < HB_GPU_PAINT_GROUP_DEPTH) {\n" " group_stack[sp] = acc;\n" " sp++;\n" " }\n" " acc = vec4 (0.0);\n" " cursor += 1;\n" " }\n" " else if (op_type == 3) /* POP_GROUP */\n" " {\n" " if (sp > 0) {\n" " sp--;\n" " vec4 src = acc;\n" " vec4 dst = group_stack[sp];\n" " acc = _hb_gpu_composite (src, dst, aux);\n" " }\n" " cursor += 1;\n" " }\n" " else\n" " {\n" " break;\n" " }\n" " }\n" "\n" " return acc;\n" "}\n" ;