/* * Copyright (C) 2026 Behdad Esfahbod * * This is part of HarfBuzz, a text shaping library. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and its documentation for any purpose, provided that the * above copyright notice and the following two paragraphs appear in * all copies of this software. * * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. */ /* Paint-renderer fragment shader (WGSL). * * Assumes the shared fragment helpers (hb-gpu-fragment.wgsl) and * the draw-renderer fragment helpers (hb-gpu-draw-fragment.wgsl) * are prepended to this source. * * atlas is passed as an explicit storage-buffer pointer parameter, * matching WGSL's scoping rules. */ fn _hb_gpu_stop_color (hb_gpu_atlas: ptr>, read>, stops_base: i32, i: i32, foreground: vec4f, offset: ptr) -> vec4f { let a = hb_gpu_fetch (hb_gpu_atlas, stops_base + i * 2); *offset = f32 (a.r) / 32767.0; let b = hb_gpu_fetch (hb_gpu_atlas, stops_base + i * 2 + 1); if ((a.g & 1) != 0) { return vec4f (foreground.rgb, foreground.a * (f32 (b.a) / 32767.0)); } return vec4f (b) / 32767.0; } fn _hb_gpu_extend_t (t: f32, extend: i32) -> f32 { if (extend == 1) { return t - floor (t); } if (extend == 2) { let u = t - 2.0 * floor (t * 0.5); if (u > 1.0) { return 2.0 - u; } return u; } return clamp (t, 0.0, 1.0); } fn _hb_gpu_eval_stops (hb_gpu_atlas: ptr>, read>, stops_base: i32, stop_count: i32, t: f32, foreground: vec4f) -> vec4f { var off_prev: f32; var col_prev = _hb_gpu_stop_color (hb_gpu_atlas, stops_base, 0, foreground, &off_prev); if (t <= off_prev) { return col_prev; } for (var i: i32 = 1; i < stop_count; i = i + 1) { var off: f32; let col = _hb_gpu_stop_color (hb_gpu_atlas, stops_base, i, foreground, &off); if (t <= off) { let span = off - off_prev; var f: f32 = 0.0; if (span > 1e-6) { f = (t - off_prev) / span; } let p0 = vec4f (col_prev.rgb * col_prev.a, col_prev.a); let p1 = vec4f (col.rgb * col.a, col.a); let pm = mix (p0, p1, f); if (pm.a > 1e-6) { return vec4f (pm.rgb / pm.a, pm.a); } return vec4f (0.0); } col_prev = col; off_prev = off; } return col_prev; } /* Apply the stored 2x2 M^-1 (row-major i16 Q10) to a vector. */ fn _hb_gpu_apply_minv (m: vec4, v: vec2f) -> vec2f { let mf = vec4f (m) * (1.0 / 1024.0); return vec2f (mf.x * v.x + mf.y * v.y, mf.z * v.x + mf.w * v.y); } fn _hb_gpu_sample_linear (renderCoord: vec2f, grad_base: i32, stop_count: i32, extend: i32, foreground: vec4f, hb_gpu_atlas: ptr>, read>) -> vec4f { let t0 = hb_gpu_fetch (hb_gpu_atlas, grad_base); let m = hb_gpu_fetch (hb_gpu_atlas, grad_base + 1); let p0_r = vec2f (f32 (t0.r), f32 (t0.g)); let d = vec2f (f32 (t0.b), f32 (t0.a)); let denom = dot (d, d); if (denom < 1e-6) { return vec4f (0.0); } let p = _hb_gpu_apply_minv (m, renderCoord - p0_r); var t = dot (p, d) / denom; t = _hb_gpu_extend_t (t, extend); return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 2, stop_count, t, foreground); } fn _hb_gpu_sample_radial (renderCoord: vec2f, grad_base: i32, stop_count: i32, extend: i32, foreground: vec4f, hb_gpu_atlas: ptr>, read>) -> vec4f { let t0 = hb_gpu_fetch (hb_gpu_atlas, grad_base); let t1 = hb_gpu_fetch (hb_gpu_atlas, grad_base + 1); let m = hb_gpu_fetch (hb_gpu_atlas, grad_base + 2); let c0_r = vec2f (f32 (t0.r), f32 (t0.g)); let cd = vec2f (f32 (t0.b), f32 (t0.a)); let r0 = f32 (t1.r); let r1 = f32 (t1.g); let dr = r1 - r0; let p = _hb_gpu_apply_minv (m, renderCoord - c0_r); let A = dot (cd, cd) - dr * dr; let B = -2.0 * (dot (p, cd) + r0 * dr); let C = dot (p, p) - r0 * r0; var t: f32; if (abs (A) > 1e-6) { let disc = B * B - 4.0 * A * C; if (disc < 0.0) { return vec4f (0.0); } let sq = sqrt (disc); let t1r = (-B + sq) / (2.0 * A); let t2r = (-B - sq) / (2.0 * A); if (r0 + t1r * dr >= 0.0) { t = t1r; } else { t = t2r; } } else { if (abs (B) < 1e-6) { return vec4f (0.0); } t = -C / B; } t = _hb_gpu_extend_t (t, extend); return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 3, stop_count, t, foreground); } fn _hb_gpu_sample_sweep (renderCoord: vec2f, grad_base: i32, stop_count: i32, extend: i32, foreground: vec4f, hb_gpu_atlas: ptr>, read>) -> vec4f { let t0 = hb_gpu_fetch (hb_gpu_atlas, grad_base); let m = hb_gpu_fetch (hb_gpu_atlas, grad_base + 1); let c_r = vec2f (f32 (t0.r), f32 (t0.g)); let a0 = f32 (t0.b) / 16384.0; let a1 = f32 (t0.a) / 16384.0; let span = a1 - a0; if (abs (span) < 1e-6) { return vec4f (0.0); } let p = _hb_gpu_apply_minv (m, renderCoord - c_r); var ang = atan2 (p.y, p.x) / 3.14159265358979; if (ang < 0.0) { ang = ang + 2.0; } var t = (ang - a0) / span; t = _hb_gpu_extend_t (t, extend); return _hb_gpu_eval_stops (hb_gpu_atlas, grad_base + 2, stop_count, t, foreground); } fn _hb_gpu_composite (src: vec4f, dst: vec4f, mode_in: i32) -> vec4f { var r = src + dst * (1.0 - src.a); /* SRC_OVER default */ /* Mode numbers match hb_paint_composite_mode_t. Approximate * unsupported modes with the nearest Porter-Duff mode we do * implement; DIFFERENCE / EXCLUSION / HSL_* still fall through to * SRC_OVER below. */ var mode = mode_in; if (mode == 14 || mode == 18 || mode == 19) { mode = 23; } /* OVERLAY / COLOR_BURN / HARD_LIGHT -> MULTIPLY */ else if (mode == 17 || mode == 20) { mode = 13; } /* COLOR_DODGE / SOFT_LIGHT -> SCREEN */ if (mode == 0) { r = vec4f (0.0); } /* CLEAR */ else if (mode == 1) { r = src; } /* SRC */ else if (mode == 2) { r = dst; } /* DST */ else if (mode == 4) { r = dst + src * (1.0 - dst.a); } /* DST_OVER */ else if (mode == 5) { r = src * dst.a; } /* SRC_IN */ else if (mode == 6) { r = dst * src.a; } /* DST_IN */ else if (mode == 7) { r = src * (1.0 - dst.a); } /* SRC_OUT */ else if (mode == 8) { r = dst * (1.0 - src.a); } /* DST_OUT */ else if (mode == 9) { r = src * dst.a + dst * (1.0 - src.a); } /* SRC_ATOP */ else if (mode == 10) { r = dst * src.a + src * (1.0 - dst.a); } /* DST_ATOP */ else if (mode == 11) { r = src * (1.0 - dst.a) + dst * (1.0 - src.a); } /* XOR */ else if (mode == 12) { r = min (src + dst, vec4f (1.0)); } /* PLUS */ else if (mode == 13) { /* SCREEN (premul) */ r = vec4f (src.rgb + dst.rgb - src.rgb * dst.rgb, src.a + dst.a - src.a * dst.a); } else if (mode == 15) { /* DARKEN */ r = vec4f (min (src.rgb * dst.a, dst.rgb * src.a) + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a), src.a + dst.a - src.a * dst.a); } else if (mode == 16) { /* LIGHTEN */ r = vec4f (max (src.rgb * dst.a, dst.rgb * src.a) + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a), src.a + dst.a - src.a * dst.a); } else if (mode == 23) { /* MULTIPLY (premul) */ r = vec4f (src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a) + src.rgb * dst.rgb, src.a + dst.a - src.a * dst.a); } /* SRC_OVER (3) and DIFFERENCE / EXCLUSION / HSL_* (21, 22, 24-27) * fall through to the SRC_OVER default. */ return r; } /* Wrap _hb_gpu_slug with a sub-glyph extents bail-out. Many * paint layers cover a small region of the outer glyph quad; for * fragments outside the layer's bbox (with an AA + MSAA-spread * margin) the slug coverage is exactly 0, so we can skip the * band/curve walk entirely. */ fn _hb_gpu_slug_clipped (renderCoord: vec2f, pixelsPerEm: vec2f, glyphLoc_: u32, hb_gpu_atlas: ptr>, read>) -> f32 { let header0 = hb_gpu_fetch (hb_gpu_atlas, i32 (glyphLoc_)); let ext = vec4f (header0) * HB_GPU_INV_UNITS; let margin = 2.0 / pixelsPerEm; if (any (renderCoord < ext.xy - margin) || any (renderCoord > ext.zw + margin)) { return 0.0; } return _hb_gpu_slug (renderCoord, pixelsPerEm, glyphLoc_, hb_gpu_atlas); } /* Combine slug coverages from all clip outlines on the layer. * Factored so the shader has one set of inlined slug walks * instead of two (one per LAYER op type). flags bits: 0x100 = * HAS_CLIP2; 0x200 = HAS_CLIP3 (HAS_CLIP3 implies HAS_CLIP2). */ fn _hb_gpu_layer_coverage (renderCoord: vec2f, pixelsPerEm: vec2f, base: i32, flags: i32, clip1_payload: i32, clip2_payload: i32, clip3_payload: i32, hb_gpu_atlas: ptr>, read>) -> f32 { var cov = _hb_gpu_slug_clipped (renderCoord, pixelsPerEm, u32 (base + clip1_payload), hb_gpu_atlas); if ((flags & 0x100) != 0) { cov = cov * _hb_gpu_slug_clipped (renderCoord, pixelsPerEm, u32 (base + clip2_payload), hb_gpu_atlas); if ((flags & 0x200) != 0) { cov = cov * _hb_gpu_slug_clipped (renderCoord, pixelsPerEm, u32 (base + clip3_payload), hb_gpu_atlas); } } return cov; } const HB_GPU_PAINT_GROUP_DEPTH: i32 = 4; fn hb_gpu_paint (renderCoord: vec2f, glyphLoc_: u32, foreground: vec4f, hb_gpu_atlas: ptr>, read>, coverage: ptr) -> vec4f { /* Compute pixelsPerEm once here at uniform control flow. WGSL * rejects fwidth inside a loop-conditional branch, so we call * _hb_gpu_slug (the MSAA-aware implementation that takes a * pre-computed pixelsPerEm) instead of the top-level * hb_gpu_draw() which would re-call fwidth. */ let emsPerPixel = fwidth (renderCoord); let pixelsPerEm = 1.0 / emsPerPixel; let base = i32 (glyphLoc_); let h0 = hb_gpu_fetch (hb_gpu_atlas, base); // (num_ops, _, _, _) let h2 = hb_gpu_fetch (hb_gpu_atlas, base + 2); // (ops_offset, _, _, _) let num_ops = h0.r; var cursor = base + h2.r; var acc = vec4f (0.0); var group_stack: array; var sp: i32 = 0; *coverage = 0.0; for (var i: i32 = 0; i < num_ops; i = i + 1) { let op = hb_gpu_fetch (hb_gpu_atlas, cursor); let op_type = op.r; let aux = op.g; let payload = (op.b << 16) | (op.a & 0xffff); if (op_type == 0) { // LAYER_SOLID // texel 1: (clip2_hi, clip2_lo, clip3_hi, clip3_lo) -- valid // per HAS_CLIP2 / HAS_CLIP3 flag bits. // texel 2: RGBA as signed Q15. let op2 = hb_gpu_fetch (hb_gpu_atlas, cursor + 1); let clip2_payload = (op2.r << 16) | (op2.g & 0xffff); let clip3_payload = (op2.b << 16) | (op2.a & 0xffff); let ct = hb_gpu_fetch (hb_gpu_atlas, cursor + 2); var col: vec4f; if ((aux & 1) != 0) { col = vec4f (foreground.rgb, foreground.a * (f32 (ct.a) / 32767.0)); } else { col = vec4f (ct) / 32767.0; } let cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm, base, aux, payload, clip2_payload, clip3_payload, hb_gpu_atlas); *coverage = max (*coverage, cov); let src = vec4f (col.rgb * col.a, col.a) * cov; acc = src + acc * (1.0 - src.a); cursor = cursor + 3; } else if (op_type == 1) { // LAYER_GRADIENT let op2 = hb_gpu_fetch (hb_gpu_atlas, cursor + 1); let clip2_payload = (op2.r << 16) | (op2.g & 0xffff); let clip3_payload = (op2.b << 16) | (op2.a & 0xffff); let op3 = hb_gpu_fetch (hb_gpu_atlas, cursor + 2); let grad_payload = (op3.r << 16) | (op3.g & 0xffff); let extend = op3.b; let stop_count = op3.a; let subtype = aux & 0xff; var col = vec4f (0.0); if (subtype == 0) { col = _hb_gpu_sample_linear (renderCoord, base + grad_payload, stop_count, extend, foreground, hb_gpu_atlas); } else if (subtype == 1) { col = _hb_gpu_sample_radial (renderCoord, base + grad_payload, stop_count, extend, foreground, hb_gpu_atlas); } else if (subtype == 2) { col = _hb_gpu_sample_sweep (renderCoord, base + grad_payload, stop_count, extend, foreground, hb_gpu_atlas); } let cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm, base, aux, payload, clip2_payload, clip3_payload, hb_gpu_atlas); *coverage = max (*coverage, cov); let src = vec4f (col.rgb * col.a, col.a) * cov; acc = src + acc * (1.0 - src.a); cursor = cursor + 3; } else if (op_type == 2) { // PUSH_GROUP if (sp < HB_GPU_PAINT_GROUP_DEPTH) { group_stack[sp] = acc; sp = sp + 1; } acc = vec4f (0.0); cursor = cursor + 1; } else if (op_type == 3) { // POP_GROUP if (sp > 0) { sp = sp - 1; let src = acc; let dst = group_stack[sp]; acc = _hb_gpu_composite (src, dst, aux); } cursor = cursor + 1; } else { break; } } return acc; }