/* * Copyright (C) 2026 Behdad Esfahbod * * This is part of HarfBuzz, a text shaping library. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and its documentation for any purpose, provided that the * above copyright notice and the following two paragraphs appear in * all copies of this software. * * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. */ /* Paint-renderer fragment shader (Metal). * * Assumes the shared fragment helpers (hb-gpu-fragment.msl) and * the draw-renderer fragment helpers (hb-gpu-draw-fragment.msl) * are prepended to this source. */ /* Fetch the i'th stop (2 texels per stop), resolving is_foreground. */ static float4 _hb_gpu_stop_color (device const short4* hb_gpu_atlas, int stops_base, int i, float4 foreground, thread float &offset) { int4 a = int4 (hb_gpu_atlas[stops_base + i * 2]); offset = float (a.r) / 32767.0; int4 b = int4 (hb_gpu_atlas[stops_base + i * 2 + 1]); if ((a.g & 1) != 0) return float4 (foreground.rgb, foreground.a * (float (b.a) / 32767.0)); return float4 (b) / 32767.0; } static float _hb_gpu_extend_t (float t, int extend) { if (extend == 1) return t - floor (t); if (extend == 2) { float u = t - 2.0 * floor (t * 0.5); return u > 1.0 ? 2.0 - u : u; } return clamp (t, 0.0, 1.0); } static float4 _hb_gpu_eval_stops (device const short4* hb_gpu_atlas, int stops_base, int stop_count, float t, float4 foreground) { float off_prev; float4 col_prev = _hb_gpu_stop_color (hb_gpu_atlas, stops_base, 0, foreground, off_prev); if (t <= off_prev) return col_prev; for (int i = 1; i < stop_count; i++) { float off; float4 col = _hb_gpu_stop_color (hb_gpu_atlas, stops_base, i, foreground, off); if (t <= off) { float span = off - off_prev; float f = span > 1e-6 ? (t - off_prev) / span : 0.0; float4 p0 = float4 (col_prev.rgb * col_prev.a, col_prev.a); float4 p1 = float4 (col.rgb * col.a, col.a); float4 pm = mix (p0, p1, f); return pm.a > 1e-6 ? float4 (pm.rgb / pm.a, pm.a) : float4 (0.0); } col_prev = col; off_prev = off; } return col_prev; } /* Apply the stored 2x2 M^-1 (row-major i16 Q10) to a vector. */ static float2 _hb_gpu_apply_minv (int4 m, float2 v) { float4 mf = float4 (m) * (1.0 / 1024.0); return float2 (mf.x * v.x + mf.y * v.y, mf.z * v.x + mf.w * v.y); } static float4 _hb_gpu_sample_linear (float2 renderCoord, int grad_base, int stop_count, int extend, float4 foreground, device const short4* hb_gpu_atlas) { int4 t0 = int4 (hb_gpu_atlas[grad_base]); int4 m = int4 (hb_gpu_atlas[grad_base + 1]); float2 p0_r = float2 (float (t0.r), float (t0.g)); float2 d = float2 (float (t0.b), float (t0.a)); float denom = dot (d, d); if (denom < 1e-6) return float4 (0.0); float2 p = _hb_gpu_apply_minv (m, renderCoord - p0_r); float t = dot (p, d) / denom; t = _hb_gpu_extend_t (t, extend); return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 2, stop_count, t, foreground); } static float4 _hb_gpu_sample_radial (float2 renderCoord, int grad_base, int stop_count, int extend, float4 foreground, device const short4* hb_gpu_atlas) { int4 t0 = int4 (hb_gpu_atlas[grad_base]); int4 t1 = int4 (hb_gpu_atlas[grad_base + 1]); int4 m = int4 (hb_gpu_atlas[grad_base + 2]); float2 c0_r = float2 (float (t0.r), float (t0.g)); float2 cd = float2 (float (t0.b), float (t0.a)); float r0 = float (t1.r); float r1 = float (t1.g); float dr = r1 - r0; float2 p = _hb_gpu_apply_minv (m, renderCoord - c0_r); float A = dot (cd, cd) - dr * dr; float B = -2.0 * (dot (p, cd) + r0 * dr); float C = dot (p, p) - r0 * r0; float t; if (abs (A) > 1e-6) { float disc = B * B - 4.0 * A * C; if (disc < 0.0) return float4 (0.0); float sq = sqrt (disc); float t1r = (-B + sq) / (2.0 * A); float t2r = (-B - sq) / (2.0 * A); t = (r0 + t1r * dr >= 0.0) ? t1r : t2r; } else { if (abs (B) < 1e-6) return float4 (0.0); t = -C / B; } t = _hb_gpu_extend_t (t, extend); return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 3, stop_count, t, foreground); } static float4 _hb_gpu_sample_sweep (float2 renderCoord, int grad_base, int stop_count, int extend, float4 foreground, device const short4* hb_gpu_atlas) { int4 t0 = int4 (hb_gpu_atlas[grad_base]); int4 m = int4 (hb_gpu_atlas[grad_base + 1]); float2 c_r = float2 (float (t0.r), float (t0.g)); float a0 = float (t0.b) / 16384.0; float a1 = float (t0.a) / 16384.0; float span = a1 - a0; if (abs (span) < 1e-6) return float4 (0.0); float2 p = _hb_gpu_apply_minv (m, renderCoord - c_r); float ang = atan2 (p.y, p.x) / 3.14159265358979; if (ang < 0.0) ang += 2.0; float t = (ang - a0) / span; t = _hb_gpu_extend_t (t, extend); return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 2, stop_count, t, foreground); } static float4 _hb_gpu_composite (float4 src, float4 dst, int mode) { float4 r = src + dst * (1.0 - src.a); /* SRC_OVER default */ /* Mode numbers match hb_paint_composite_mode_t. SRC_OVER (3) is * the default `r` above. */ /* Approximate unsupported modes with the nearest Porter-Duff mode * we do implement. DIFFERENCE / EXCLUSION / HSL_* still fall * through to SRC_OVER below. */ if (mode == 14 || mode == 18 || mode == 19) mode = 23; /* OVERLAY / COLOR_BURN / HARD_LIGHT -> MULTIPLY */ else if (mode == 17 || mode == 20) mode = 13; /* COLOR_DODGE / SOFT_LIGHT -> SCREEN */ if (mode == 0) r = float4 (0.0); /* CLEAR */ else if (mode == 1) r = src; /* SRC */ else if (mode == 2) r = dst; /* DST */ else if (mode == 4) r = dst + src * (1.0 - dst.a); /* DST_OVER */ else if (mode == 5) r = src * dst.a; /* SRC_IN */ else if (mode == 6) r = dst * src.a; /* DST_IN */ else if (mode == 7) r = src * (1.0 - dst.a); /* SRC_OUT */ else if (mode == 8) r = dst * (1.0 - src.a); /* DST_OUT */ else if (mode == 9) /* SRC_ATOP */ r = src * dst.a + dst * (1.0 - src.a); else if (mode == 10) /* DST_ATOP */ r = dst * src.a + src * (1.0 - dst.a); else if (mode == 11) /* XOR */ r = src * (1.0 - dst.a) + dst * (1.0 - src.a); else if (mode == 12) /* PLUS */ r = min (src + dst, float4 (1.0)); else if (mode == 13) { /* SCREEN (premul) */ r.rgb = src.rgb + dst.rgb - src.rgb * dst.rgb; r.a = src.a + dst.a - src.a * dst.a; } else if (mode == 15) { /* DARKEN */ r.rgb = min (src.rgb * dst.a, dst.rgb * src.a) + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a); r.a = src.a + dst.a - src.a * dst.a; } else if (mode == 16) { /* LIGHTEN */ r.rgb = max (src.rgb * dst.a, dst.rgb * src.a) + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a); r.a = src.a + dst.a - src.a * dst.a; } else if (mode == 23) { /* MULTIPLY (premul) */ r.rgb = src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a) + src.rgb * dst.rgb; r.a = src.a + dst.a - src.a * dst.a; } /* SRC_OVER (3) and DIFFERENCE / EXCLUSION / HSL_* (21, 22, 24-27) * fall through to the SRC_OVER default. */ return r; } /* Wrap _hb_gpu_slug with a sub-glyph extents bail-out. Many * paint layers cover a small region of the outer glyph quad; for * fragments outside the layer's bbox (with an AA + MSAA-spread * margin) the slug coverage is exactly 0, so we can skip the * band/curve walk entirely. */ float _hb_gpu_slug_clipped (float2 renderCoord, float2 pixelsPerEm, uint glyphLoc_, device const short4* hb_gpu_atlas) { int4 header0 = hb_gpu_fetch (hb_gpu_atlas, int (glyphLoc_)); float4 ext = float4 (header0) * HB_GPU_INV_UNITS; float2 margin = 2.0 / pixelsPerEm; if (any (renderCoord < ext.xy - margin) || any (renderCoord > ext.zw + margin)) return 0.0; return _hb_gpu_slug (renderCoord, pixelsPerEm, glyphLoc_, hb_gpu_atlas); } /* Combine slug coverages from all clip outlines on the layer. * Factored out so the shader has one set of inlined slug walks * instead of two (one per LAYER op type). flags bits: 0x100 = * HAS_CLIP2; 0x200 = HAS_CLIP3 (HAS_CLIP3 implies HAS_CLIP2). */ static float _hb_gpu_layer_coverage (float2 renderCoord, float2 pixelsPerEm, int base, int flags, int clip1_payload, int clip2_payload, int clip3_payload, device const short4* hb_gpu_atlas) { float cov = _hb_gpu_slug_clipped (renderCoord, pixelsPerEm, uint (base + clip1_payload), hb_gpu_atlas); if ((flags & 0x100) != 0) { cov *= _hb_gpu_slug_clipped (renderCoord, pixelsPerEm, uint (base + clip2_payload), hb_gpu_atlas); if ((flags & 0x200) != 0) cov *= _hb_gpu_slug_clipped (renderCoord, pixelsPerEm, uint (base + clip3_payload), hb_gpu_atlas); } return cov; } #define HB_GPU_PAINT_GROUP_DEPTH 4 float4 hb_gpu_paint (float2 renderCoord, uint glyphLoc_, float4 foreground, device const short4* hb_gpu_atlas, thread float &coverage) { /* fwidth once, at uniform control flow. */ float2 pixelsPerEm = 1.0 / fwidth (renderCoord); int base = int (glyphLoc_); int4 h0 = int4 (hb_gpu_atlas[base]); /* (num_ops, _, _, _) */ int4 h2 = int4 (hb_gpu_atlas[base + 2]); /* (ops_offset, _, _, _) */ int num_ops = h0.r; int cursor = base + h2.r; float4 acc = float4 (0.0); float4 group_stack[HB_GPU_PAINT_GROUP_DEPTH]; int sp = 0; coverage = 0.0; for (int i = 0; i < num_ops; i++) { int4 op = int4 (hb_gpu_atlas[cursor]); int op_type = op.r; int aux = op.g; int payload = (op.b << 16) | (op.a & 0xffff); if (op_type == 0) /* LAYER_SOLID */ { /* texel 1: (clip2_hi, clip2_lo, clip3_hi, clip3_lo) -- valid * per HAS_CLIP2 / HAS_CLIP3 flag bits. * texel 2: RGBA as signed Q15. */ int4 op2 = int4 (hb_gpu_atlas[cursor + 1]); int clip2_payload = (op2.r << 16) | (op2.g & 0xffff); int clip3_payload = (op2.b << 16) | (op2.a & 0xffff); int4 ct = int4 (hb_gpu_atlas[cursor + 2]); float4 col = ((aux & 1) != 0) ? float4 (foreground.rgb, foreground.a * (float (ct.a) / 32767.0)) : float4 (ct) / 32767.0; float cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm, base, aux, payload, clip2_payload, clip3_payload, hb_gpu_atlas); coverage = max (coverage, cov); float4 src = float4 (col.rgb * col.a, col.a) * cov; acc = src + acc * (1.0 - src.a); cursor += 3; } else if (op_type == 1) /* LAYER_GRADIENT */ { int4 op2 = int4 (hb_gpu_atlas[cursor + 1]); int clip2_payload = (op2.r << 16) | (op2.g & 0xffff); int clip3_payload = (op2.b << 16) | (op2.a & 0xffff); int4 op3 = int4 (hb_gpu_atlas[cursor + 2]); int grad_payload = (op3.r << 16) | (op3.g & 0xffff); int extend = op3.b; int stop_count = op3.a; int subtype = aux & 0xff; float4 col = float4 (0.0); if (subtype == 0) col = _hb_gpu_sample_linear (renderCoord, base + grad_payload, stop_count, extend, foreground, hb_gpu_atlas); else if (subtype == 1) col = _hb_gpu_sample_radial (renderCoord, base + grad_payload, stop_count, extend, foreground, hb_gpu_atlas); else if (subtype == 2) col = _hb_gpu_sample_sweep (renderCoord, base + grad_payload, stop_count, extend, foreground, hb_gpu_atlas); float cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm, base, aux, payload, clip2_payload, clip3_payload, hb_gpu_atlas); coverage = max (coverage, cov); float4 src = float4 (col.rgb * col.a, col.a) * cov; acc = src + acc * (1.0 - src.a); cursor += 3; } else if (op_type == 2) /* PUSH_GROUP */ { if (sp < HB_GPU_PAINT_GROUP_DEPTH) { group_stack[sp] = acc; sp++; } acc = float4 (0.0); cursor += 1; } else if (op_type == 3) /* POP_GROUP */ { if (sp > 0) { sp--; float4 src = acc; float4 dst = group_stack[sp]; acc = _hb_gpu_composite (src, dst, aux); } cursor += 1; } else { break; } } return acc; }