/* * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. * 2005 Lars Knoll & Zack Rusin, Trolltech * 2024 Filip Wasil, Samsung Electronics * 2024 Bernard Gingold, Samsung Electronics * 2025 Marek Pikuła, Samsung Electronics * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation, and that the name of Keith Packard not be used in * advertising or publicity pertaining to distribution of the software without * specific, written prior permission. Keith Packard makes no * representations about the suitability of this software for any purpose. It * is provided "as is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. */ #ifdef HAVE_CONFIG_H #include #endif #include "pixman-combine-float.h" #include "pixman-combine32.h" #include "pixman-inlines.h" #include "pixman-private.h" #include #include #include #include #include #include #include #include #include // Convenience macros { #define __FE_PTR(p, vl) ((p) += (vl)) #define _RVV_FE_PRE(total_len, vn, vl, vspec) \ size_t vn = total_len, vl = __riscv_vsetvl_##vspec (vn); \ vn > 0 #define _RVV_FE_POST(vn, vl, vspec) vn -= (vl), vl = __riscv_vsetvl_##vspec (vn) #define RVV_FOREACH_1(total_len, vl, vspec, p1) \ for (_RVV_FE_PRE (total_len, vn, vl, vspec); \ __FE_PTR (p1, vl), _RVV_FE_POST (vn, vl, vspec)) #define RVV_FOREACH_2(total_len, vl, vspec, p1, p2) \ for (_RVV_FE_PRE (total_len, vn, vl, vspec); \ __FE_PTR (p1, vl), __FE_PTR (p2, vl), _RVV_FE_POST (vn, vl, vspec)) #define RVV_FOREACH_3(total_len, vl, vspec, p1, p2, p3) \ for (_RVV_FE_PRE (total_len, vn, vl, vspec); \ __FE_PTR (p1, vl), __FE_PTR (p2, vl), __FE_PTR (p3, vl), \ _RVV_FE_POST (vn, vl, vspec)) // vuintXXmYY_t for use in macros (less token concatenation). #define VUINT(ELEN, LMUL) vuint##ELEN##LMUL##_t #define VUINT32(LMUL) VUINT (32, LMUL) #define VUINT16(LMUL) VUINT (16, LMUL) #define VUINT8(LMUL) VUINT (8, LMUL) // Short for vreinterpret commonly used for ARGB batch operations. #define RVV_U8x4_U32(LMUL, value) \ __riscv_vreinterpret_v_u8##LMUL##_u32##LMUL (value) #define RVV_U8x4_U32_m2(value) RVV_U8x4_U32 (m2, value) #define RVV_U8x4_U32_m4(value) RVV_U8x4_U32 (m4, value) #define RVV_U32_U8x4(LMUL, value) \ __riscv_vreinterpret_v_u32##LMUL##_u8##LMUL (value) #define RVV_U32_U8x4_m2(value) RVV_U32_U8x4 (m2, value) #define RVV_U32_U8x4_m4(value) RVV_U32_U8x4 (m4, value) // } // Float implementation /* * Screen * * ad * as * B(d/ad, s/as) * = ad * as * (d/ad + s/as - s/as * d/ad) * = ad * s + as * d - s * d */ static force_inline vfloat32m1_t rvv_blend_screen_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t t0, t1, t2; t0 = __riscv_vfmul_vv_f32m1 (s, da, vl); t1 = __riscv_vfmul_vv_f32m1 (d, sa, vl); t2 = __riscv_vfmul_vv_f32m1 (s, d, vl); return __riscv_vfsub_vv_f32m1 (__riscv_vfadd_vv_f32m1 (t0, t1, vl), t2, vl); } /* * Multiply * * ad * as * B(d / ad, s / as) * = ad * as * d/ad * s/as * = d * s * */ static force_inline vfloat32m1_t rvv_blend_multiply_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { return __riscv_vfmul_vv_f32m1 (s, d, vl); } /* * Overlay * * ad * as * B(d/ad, s/as) * = ad * as * Hardlight (s, d) * = if (d / ad < 0.5) * as * ad * Multiply (s/as, 2 * d/ad) * else * as * ad * Screen (s/as, 2 * d / ad - 1) * = if (d < 0.5 * ad) * as * ad * s/as * 2 * d /ad * else * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) * = if (2 * d < ad) * 2 * s * d * else * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) * = if (2 * d < ad) * 2 * s * d * else * as * ad - 2 * (ad - d) * (as - s) */ static force_inline vfloat32m1_t rvv_blend_overlay_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4, f0, f1, f2; vbool32_t vb; t0 = __riscv_vfadd_vv_f32m1 (d, d, vl); t1 = __riscv_vfmul_vv_f32m1 (__riscv_vfadd_vv_f32m1 (s, s, vl), d, vl); vb = __riscv_vmflt_vv_f32m1_b32 (t0, da, vl); t2 = __riscv_vfmul_vv_f32m1 (sa, da, vl); f2 = __riscv_vfsub_vv_f32m1 (da, d, vl); t3 = __riscv_vfmul_vf_f32m1 (f2, 2.0f, vl); t4 = __riscv_vfsub_vv_f32m1 (sa, s, vl); f0 = __riscv_vfmul_vv_f32m1 (t3, t4, vl); f1 = __riscv_vfsub_vv_f32m1 (t2, f0, vl); return __riscv_vmerge_vvm_f32m1 (f1, t1, vb, vl); } /* * Darken * * ad * as * B(d/ad, s/as) * = ad * as * MIN(d/ad, s/as) * = MIN (as * d, ad * s) */ static force_inline vfloat32m1_t rvv_blend_darken_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t ss, dd; vbool32_t vb; ss = __riscv_vfmul_vv_f32m1 (da, s, vl); dd = __riscv_vfmul_vv_f32m1 (sa, d, vl); vb = __riscv_vmfgt_vv_f32m1_b32 (ss, dd, vl); return __riscv_vmerge_vvm_f32m1 (ss, dd, vb, vl); } /* * Lighten * * ad * as * B(d/ad, s/as) * = ad * as * MAX(d/ad, s/as) * = MAX (as * d, ad * s) */ static force_inline vfloat32m1_t rvv_blend_lighten_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t ss, dd; vbool32_t vb; ss = __riscv_vfmul_vv_f32m1 (s, da, vl); dd = __riscv_vfmul_vv_f32m1 (d, sa, vl); vb = __riscv_vmfgt_vv_f32m1_b32 (ss, dd, vl); return __riscv_vmerge_vvm_f32m1 (dd, ss, vb, vl); } /* * Color dodge * * ad * as * B(d/ad, s/as) * = if d/ad = 0 * ad * as * 0 * else if (d/ad >= (1 - s/as) * ad * as * 1 * else * ad * as * ((d/ad) / (1 - s/as)) * = if d = 0 * 0 * elif as * d >= ad * (as - s) * ad * as * else * as * (as * d / (as - s)) * */ static force_inline vfloat32m1_t rvv_blend_color_dodge_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4; vbool32_t is_d_zero, vb, is_t0_non_zero; is_d_zero = __riscv_vmfeq_vf_f32m1_b32 (d, 0.0f, vl); t0 = __riscv_vfsub_vv_f32m1 (sa, s, vl); // sa - s t1 = __riscv_vfmul_vv_f32m1 (sa, d, vl); // d * sa t2 = __riscv_vfmul_vv_f32m1 (sa, da, vl); // sa * da t3 = __riscv_vfsub_vv_f32m1 (t2, __riscv_vfmul_vv_f32m1 (s, da, vl), vl); // sa * da - s * da is_t0_non_zero = __riscv_vmfne_vf_f32m1_b32 (t0, 0.0f, vl); vb = __riscv_vmflt_vv_f32m1_b32 (t3, t1, vl); t4 = __riscv_vfdiv_vv_f32m1 (__riscv_vfmul_vv_f32m1 (sa, t1, vl), t0, vl); // sa * sa * d / (sa - s); return __riscv_vfmerge_vfm_f32m1 ( __riscv_vmerge_vvm_f32m1 ( __riscv_vmerge_vvm_f32m1 (t2, t4, is_t0_non_zero, vl), t2, vb, vl), 0.0f, is_d_zero, vl); } /* * Color burn * * We modify the first clause "if d = 1" to "if d >= 1" since with * premultiplied colors d > 1 can actually happen. * * ad * as * B(d/ad, s/as) * = if d/ad >= 1 * ad * as * 1 * elif (1 - d/ad) >= s/as * ad * as * 0 * else * ad * as * (1 - ((1 - d/ad) / (s/as))) * = if d >= ad * ad * as * elif as * ad - as * d >= ad * s * 0 * else * ad * as - as * as * (ad - d) / s */ static force_inline vfloat32m1_t rvv_blend_color_burn_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4, t5, t6, t7; vbool32_t is_d_ge_da, is_s_zero, vb; is_d_ge_da = __riscv_vmfge_vv_f32m1_b32 (d, da, vl); is_s_zero = __riscv_vmfeq_vf_f32m1_b32 (s, 0.0f, vl); t0 = __riscv_vfmul_vv_f32m1 (sa, __riscv_vfsub_vv_f32m1 (da, d, vl), vl); // sa * (da - d) t1 = __riscv_vfsub_vv_f32m1 (da, __riscv_vfdiv_vv_f32m1 (t0, s, vl), vl); // da - sa * (da - d) / s) t2 = __riscv_vfmul_vv_f32m1 (sa, da, vl); // sa * da t3 = __riscv_vfmul_vv_f32m1 (sa, t1, vl); // sa * (da - sa * (da - d) / s) t4 = __riscv_vfmul_vv_f32m1 (s, da, vl); // s * da vb = __riscv_vmfge_vf_f32m1_b32 (__riscv_vfsub_vv_f32m1 (t0, t4, vl), 0.0f, vl); // if (sa * (da - d) - s * da >= 0.0f) t6 = __riscv_vfmerge_vfm_f32m1 (t3, 0.0f, is_s_zero, vl); t5 = __riscv_vfmerge_vfm_f32m1 (t6, 0.0f, vb, vl); t7 = __riscv_vmerge_vvm_f32m1 (t5, t2, is_d_ge_da, vl); return t7; } /* * Hard light * * ad * as * B(d/ad, s/as) * = if (s/as <= 0.5) * ad * as * Multiply (d/ad, 2 * s/as) * else * ad * as * Screen (d/ad, 2 * s/as - 1) * = if 2 * s <= as * ad * as * d/ad * 2 * s / as * else * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1)) * = if 2 * s <= as * 2 * s * d * else * as * ad - 2 * (ad - d) * (as - s) */ static force_inline vfloat32m1_t rvv_blend_hard_light_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4; vbool32_t vb; t0 = __riscv_vfadd_vv_f32m1 (s, s, vl); t1 = __riscv_vfmul_vv_f32m1 (__riscv_vfadd_vv_f32m1 (s, s, vl), d, vl); vb = __riscv_vmfgt_vv_f32m1_b32 (t0, sa, vl); t2 = __riscv_vfmul_vv_f32m1 (sa, da, vl); t3 = __riscv_vfmul_vf_f32m1 (__riscv_vfsub_vv_f32m1 (da, d, vl), 2.0f, vl); t4 = __riscv_vfsub_vv_f32m1 (sa, s, vl); return __riscv_vmerge_vvm_f32m1 ( t1, __riscv_vfsub_vv_f32m1 (t2, __riscv_vfmul_vv_f32m1 (t3, t4, vl), vl), vb, vl); } /* * Soft light * * ad * as * B(d/ad, s/as) * = if (s/as <= 0.5) * ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad)) * else if (d/ad <= 0.25) * ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad)) * else * ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad)) * = if (2 * s <= as) * d * as - d * (ad - d) * (as - 2 * s) / ad; * else if (4 * d <= ad) * (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3); * else * d * as + (sqrt (d * ad) - d) * (2 * s - as); */ static force_inline vfloat32m1_t rvv_blend_soft_light_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13; vbool32_t is_sa_lt_2s, is_da_ls_4d, is_da_non_zero; is_da_non_zero = __riscv_vmfne_vf_f32m1_b32 (da, 0.0f, vl); t0 = __riscv_vfadd_vv_f32m1 (s, s, vl); // 2 * s is_sa_lt_2s = __riscv_vmflt_vv_f32m1_b32 (sa, t0, vl); t1 = __riscv_vfmul_vv_f32m1 (sa, d, vl); // d * sa t2 = __riscv_vfsub_vv_f32m1 (sa, t0, vl); // (sa - 2*s) t3 = __riscv_vfmul_vv_f32m1 (d, t2, vl); // (sa - 2*s) * d t7 = __riscv_vfdiv_vv_f32m1 (__riscv_vfmul_vf_f32m1 (d, 16.0f, vl), da, vl); // 16 * d / da t8 = __riscv_vfmul_vv_f32m1 (d, __riscv_vfsub_vf_f32m1 (t7, 12.0f, vl), vl); // (16 * d / da - 12) * d t9 = __riscv_vfadd_vf_f32m1 (__riscv_vfdiv_vv_f32m1 (t8, da, vl), 3.0f, vl); // (16 * d / da - 12) * d / da + 3) t4 = __riscv_vfmul_vv_f32m1 ( t3, t9, vl); // (sa - 2*s) * d * ((16 * d / da - 12) * d / da + 3) t5 = __riscv_vfsub_vv_f32m1 ( t1, t4, vl); // d * sa - (sa - 2*s) * d * ((16 * d / da - 12) * d / da + 3) t6 = __riscv_vfadd_vv_f32m1 (__riscv_vfadd_vv_f32m1 (d, d, vl), __riscv_vfadd_vv_f32m1 (d, d, vl), vl); is_da_ls_4d = __riscv_vmflt_vv_f32m1_b32 (da, t6, vl); t10 = __riscv_vfsub_vv_f32m1 ( __riscv_vfsqrt_v_f32m1 (__riscv_vfmul_vv_f32m1 (d, da, vl), vl), d, vl); // sqrtf (d * da) - d t11 = __riscv_vfmul_vv_f32m1 (t2, t10, vl); // (sqrtf (d * da) - d) * (sa - 2 * s) t12 = __riscv_vfsub_vv_f32m1 ( t1, t11, vl); // d * sa - (sqrtf (d * da) - d) * (sa - 2 * s) // d * sa - d * (da - d) * (sa - 2 * s) / da t13 = __riscv_vfsub_vv_f32m1 ( t1, __riscv_vfdiv_vv_f32m1 ( __riscv_vfmul_vv_f32m1 (__riscv_vfmul_vv_f32m1 (d, t2, vl), __riscv_vfsub_vv_f32m1 (da, d, vl), vl), da, vl), vl); return __riscv_vmerge_vvm_f32m1 ( t1, // if (!FLOAT_IS_ZERO (da)) __riscv_vmerge_vvm_f32m1 ( t13, // if (4 * d > da) __riscv_vmerge_vvm_f32m1 (t5, t12, is_da_ls_4d, vl), is_sa_lt_2s, vl), is_da_non_zero, vl); } /* * Difference * * ad * as * B(s/as, d/ad) * = ad * as * abs (s/as - d/ad) * = if (s/as <= d/ad) * ad * as * (d/ad - s/as) * else * ad * as * (s/as - d/ad) * = if (ad * s <= as * d) * as * d - ad * s * else * ad * s - as * d */ static force_inline vfloat32m1_t rvv_blend_difference_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t dsa, sda; vbool32_t vb; dsa = __riscv_vfmul_vv_f32m1 (d, sa, vl); sda = __riscv_vfmul_vv_f32m1 (s, da, vl); vb = __riscv_vmflt_vv_f32m1_b32 (sda, dsa, vl); return __riscv_vmerge_vvm_f32m1 (__riscv_vfsub_vv_f32m1 (sda, dsa, vl), __riscv_vfsub_vv_f32m1 (dsa, sda, vl), vb, vl); } /* * Exclusion * * ad * as * B(s/as, d/ad) * = ad * as * (d/ad + s/as - 2 * d/ad * s/as) * = as * d + ad * s - 2 * s * d */ static force_inline vfloat32m1_t rvv_blend_exclusion_float (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl) { vfloat32m1_t t0, t1; t0 = __riscv_vfmul_vv_f32m1 (__riscv_vfadd_vv_f32m1 (d, d, vl), s, vl); t1 = __riscv_vfadd_vv_f32m1 (__riscv_vfmul_vv_f32m1 (s, da, vl), __riscv_vfmul_vv_f32m1 (d, sa, vl), vl); return __riscv_vfsub_vv_f32m1 (t1, t0, vl); } typedef vfloat32m1_t (*rvv_combine_channel_float_t) (const vfloat32m1_t sa, const vfloat32m1_t s, const vfloat32m1_t da, const vfloat32m1_t d, size_t vl); static force_inline void rvv_combine_inner_float (pixman_bool_t component, float *dest, const float *src, const float *mask, int n_pixels, rvv_combine_channel_float_t combine_a, rvv_combine_channel_float_t combine_c) { float *__restrict__ pd = dest; const float *__restrict__ ps = src; const float *__restrict__ pm = mask; const int component_count = 4; int vn = component_count * n_pixels; int vl = 0; int vl_step = 0; const ptrdiff_t stride = component_count * sizeof (float); vfloat32m1x4_t sa_sr_sg_sb, da_dr_dg_db, ma_mr_mg_mb; vfloat32m1_t da2, dr2, dg2, db2, ma2, mr2, mg2, mb2, sr2, sg2, sb2, sa2; if (n_pixels == 0) { return; } if (!mask) { for (; vn > 0; vn -= vl_step, pd += vl_step, ps += vl_step) { vl = __riscv_vsetvl_e32m1 (vn / component_count); sa_sr_sg_sb = __riscv_vlseg4e32_v_f32m1x4 (ps, vl); da_dr_dg_db = __riscv_vlseg4e32_v_f32m1x4 (pd, vl); da2 = combine_a (__riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), vl); dr2 = combine_c (__riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 1), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 1), vl); dg2 = combine_c (__riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 2), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 2), vl); db2 = combine_c (__riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 3), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 3), vl); __riscv_vsseg4e32_v_f32m1x4 ( pd, __riscv_vcreate_v_f32m1x4 (da2, dr2, dg2, db2), vl); vl_step = vl * component_count; } } else { if (component) { for (; vn > 0; vn -= vl_step, pd += vl_step, ps += vl_step, pm += vl_step) { vl = __riscv_vsetvl_e32m1 (vn / component_count); sa_sr_sg_sb = __riscv_vlseg4e32_v_f32m1x4 (ps, vl); da_dr_dg_db = __riscv_vlseg4e32_v_f32m1x4 (pd, vl); ma_mr_mg_mb = __riscv_vlseg4e32_v_f32m1x4 (pm, vl); sr2 = __riscv_vfmul_vv_f32m1 ( __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 1), __riscv_vget_v_f32m1x4_f32m1 (ma_mr_mg_mb, 1), vl); sg2 = __riscv_vfmul_vv_f32m1 ( __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 2), __riscv_vget_v_f32m1x4_f32m1 (ma_mr_mg_mb, 2), vl); sb2 = __riscv_vfmul_vv_f32m1 ( __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 3), __riscv_vget_v_f32m1x4_f32m1 (ma_mr_mg_mb, 3), vl); ma2 = __riscv_vfmul_vv_f32m1 ( __riscv_vget_v_f32m1x4_f32m1 (ma_mr_mg_mb, 0), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), vl); mr2 = __riscv_vfmul_vv_f32m1 ( __riscv_vget_v_f32m1x4_f32m1 (ma_mr_mg_mb, 1), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), vl); mg2 = __riscv_vfmul_vv_f32m1 ( __riscv_vget_v_f32m1x4_f32m1 (ma_mr_mg_mb, 2), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), vl); mb2 = __riscv_vfmul_vv_f32m1 ( __riscv_vget_v_f32m1x4_f32m1 (ma_mr_mg_mb, 3), __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), vl); da2 = combine_a ( ma2, ma2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), vl); dr2 = combine_c ( mr2, sr2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 1), vl); dg2 = combine_c ( mg2, sg2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 2), vl); db2 = combine_c ( mb2, sb2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 3), vl); __riscv_vsseg4e32_v_f32m1x4 ( pd, __riscv_vcreate_v_f32m1x4 (da2, dr2, dg2, db2), vl); vl_step = vl * component_count; } } else { for (; vn > 0; vn -= vl_step, pd += vl_step, ps += vl_step, pm += vl_step) { vl = __riscv_vsetvl_e32m1 (vn / component_count); sa_sr_sg_sb = __riscv_vlseg4e32_v_f32m1x4 (ps, vl); da_dr_dg_db = __riscv_vlseg4e32_v_f32m1x4 (pd, vl); ma2 = __riscv_vlse32_v_f32m1 (pm, stride, vl); sa2 = __riscv_vfmul_vv_f32m1 ( ma2, __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 0), vl); sr2 = __riscv_vfmul_vv_f32m1 ( ma2, __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 1), vl); sg2 = __riscv_vfmul_vv_f32m1 ( ma2, __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 2), vl); sb2 = __riscv_vfmul_vv_f32m1 ( ma2, __riscv_vget_v_f32m1x4_f32m1 (sa_sr_sg_sb, 3), vl); ma2 = sa2; dr2 = combine_c ( ma2, sr2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 1), vl); dg2 = combine_c ( ma2, sg2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 2), vl); db2 = combine_c ( ma2, sb2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 3), vl); da2 = combine_a ( ma2, sa2, __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), __riscv_vget_v_f32m1x4_f32m1 (da_dr_dg_db, 0), vl); __riscv_vsseg4e32_v_f32m1x4 ( pd, __riscv_vcreate_v_f32m1x4 (da2, dr2, dg2, db2), vl); vl_step = vl * component_count; } } } } #define RVV_MAKE_COMBINER(name, component, combine_a, combine_c) \ static void rvv_combine_##name##_float ( \ pixman_implementation_t *imp, pixman_op_t op, float *dest, \ const float *src, const float *mask, int n_pixels) \ { \ rvv_combine_inner_float (component, dest, src, mask, n_pixels, \ combine_a, combine_c); \ } #define RVV_MAKE_COMBINERS(name, combine_a, combine_c) \ RVV_MAKE_COMBINER (name##_ca, TRUE, combine_a, combine_c) \ RVV_MAKE_COMBINER (name##_u, FALSE, combine_a, combine_c) static force_inline vfloat32m1_t rvv_get_factor_float (combine_factor_t factor, vfloat32m1_t sa, vfloat32m1_t da, size_t vl) { vfloat32m1_t vone = __riscv_vfmv_v_f_f32m1 (1.0f, vl); vfloat32m1_t vzero = __riscv_vfmv_v_f_f32m1 (0.0f, vl); switch (factor) { case ZERO: return vzero; case ONE: return vone; case SRC_ALPHA: return sa; case DEST_ALPHA: return da; case INV_SA: return __riscv_vfsub_vv_f32m1 (vone, sa, vl); case INV_DA: return __riscv_vfsub_vv_f32m1 (vone, da, vl); case SA_OVER_DA: return __riscv_vmerge_vvm_f32m1 ( vone, __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 ( vzero, __riscv_vfdiv_vv_f32m1 (sa, da, vl), vl), vl), __riscv_vmfne_vf_f32m1_b32 (da, 0.0f, vl), vl); case DA_OVER_SA: return __riscv_vmerge_vvm_f32m1 ( __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 ( vzero, __riscv_vfdiv_vv_f32m1 (da, sa, vl), vl), vl), vone, __riscv_vmfeq_vf_f32m1_b32 (sa, 0.0f, vl), vl); case INV_SA_OVER_DA: { vfloat32m1_t t0 = __riscv_vfdiv_vv_f32m1 ( __riscv_vfsub_vv_f32m1 (vone, sa, vl), da, vl); return __riscv_vmerge_vvm_f32m1 ( vone, __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 (vzero, t0, vl), vl), __riscv_vmfne_vf_f32m1_b32 (da, 0.0f, vl), vl); } case INV_DA_OVER_SA: { vfloat32m1_t t0 = __riscv_vfdiv_vv_f32m1 ( __riscv_vfsub_vv_f32m1 (vone, da, vl), sa, vl); return __riscv_vmerge_vvm_f32m1 ( vone, __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 (vzero, t0, vl), vl), __riscv_vmfne_vf_f32m1_b32 (sa, 0.0f, vl), vl); } case ONE_MINUS_SA_OVER_DA: { vfloat32m1_t t0 = __riscv_vfsub_vv_f32m1 ( vone, __riscv_vfdiv_vv_f32m1 (sa, da, vl), vl); return __riscv_vmerge_vvm_f32m1 ( vzero, __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 (vzero, t0, vl), vl), __riscv_vmfne_vf_f32m1_b32 (da, 0.0f, vl), vl); } case ONE_MINUS_DA_OVER_SA: { vfloat32m1_t t0 = __riscv_vfsub_vv_f32m1 ( vone, __riscv_vfdiv_vv_f32m1 (da, sa, vl), vl); return __riscv_vmerge_vvm_f32m1 ( vzero, __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 (vzero, t0, vl), vl), __riscv_vmfne_vf_f32m1_b32 (sa, 0.0f, vl), vl); } case ONE_MINUS_INV_DA_OVER_SA: { vbool32_t is_zero = __riscv_vmand_mm_b32 ( __riscv_vmflt_vf_f32m1_b32 (sa, FLT_MIN, vl), __riscv_vmfgt_vf_f32m1_b32 (sa, -FLT_MAX, vl), vl); vfloat32m1_t t0 = __riscv_vfsub_vv_f32m1 ( vone, __riscv_vfdiv_vv_f32m1 ( __riscv_vfsub_vv_f32m1 (vone, da, vl), sa, vl), vl); return __riscv_vmerge_vvm_f32m1 ( __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 (vzero, t0, vl), vl), vzero, is_zero, vl); } case ONE_MINUS_INV_SA_OVER_DA: { vfloat32m1_t t0 = __riscv_vfsub_vv_f32m1 ( vone, __riscv_vfdiv_vv_f32m1 ( __riscv_vfsub_vv_f32m1 (vone, sa, vl), da, vl), vl); return __riscv_vmerge_vvm_f32m1 ( __riscv_vfmin_vv_f32m1 ( vone, __riscv_vfmax_vv_f32m1 (vzero, t0, vl), vl), vzero, __riscv_vmfeq_vf_f32m1_b32 (da, 0.0f, vl), vl); } } return __riscv_vfmv_v_f_f32m1 (-1.0f, vl); } #define RVV_MAKE_PD_COMBINERS(name, a, b) \ static vfloat32m1_t force_inline rvv_pd_combine_##name##_float ( \ vfloat32m1_t sa, vfloat32m1_t s, vfloat32m1_t da, vfloat32m1_t d, \ size_t vl) \ { \ const vfloat32m1_t fa = rvv_get_factor_float (a, sa, da, vl); \ const vfloat32m1_t fb = rvv_get_factor_float (b, sa, da, vl); \ vfloat32m1_t t0 = __riscv_vfadd_vv_f32m1 ( \ __riscv_vfmul_vv_f32m1 (s, fa, vl), \ __riscv_vfmul_vv_f32m1 (d, fb, vl), vl); \ return __riscv_vfmin_vv_f32m1 (__riscv_vfmv_v_f_f32m1 (1.0f, vl), t0, \ vl); \ } \ \ RVV_MAKE_COMBINERS (name, rvv_pd_combine_##name##_float, \ rvv_pd_combine_##name##_float) RVV_MAKE_PD_COMBINERS (clear, ZERO, ZERO) RVV_MAKE_PD_COMBINERS (src, ONE, ZERO) RVV_MAKE_PD_COMBINERS (dst, ZERO, ONE) RVV_MAKE_PD_COMBINERS (over, ONE, INV_SA) RVV_MAKE_PD_COMBINERS (over_reverse, INV_DA, ONE) RVV_MAKE_PD_COMBINERS (in, DEST_ALPHA, ZERO) RVV_MAKE_PD_COMBINERS (in_reverse, ZERO, SRC_ALPHA) RVV_MAKE_PD_COMBINERS (out, INV_DA, ZERO) RVV_MAKE_PD_COMBINERS (out_reverse, ZERO, INV_SA) RVV_MAKE_PD_COMBINERS (atop, DEST_ALPHA, INV_SA) RVV_MAKE_PD_COMBINERS (atop_reverse, INV_DA, SRC_ALPHA) RVV_MAKE_PD_COMBINERS (xor, INV_DA, INV_SA) RVV_MAKE_PD_COMBINERS (add, ONE, ONE) RVV_MAKE_PD_COMBINERS (saturate, INV_DA_OVER_SA, ONE) RVV_MAKE_PD_COMBINERS (disjoint_clear, ZERO, ZERO) RVV_MAKE_PD_COMBINERS (disjoint_src, ONE, ZERO) RVV_MAKE_PD_COMBINERS (disjoint_dst, ZERO, ONE) RVV_MAKE_PD_COMBINERS (disjoint_over, ONE, INV_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (disjoint_over_reverse, INV_DA_OVER_SA, ONE) RVV_MAKE_PD_COMBINERS (disjoint_in, ONE_MINUS_INV_DA_OVER_SA, ZERO) RVV_MAKE_PD_COMBINERS (disjoint_in_reverse, ZERO, ONE_MINUS_INV_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (disjoint_out, INV_DA_OVER_SA, ZERO) RVV_MAKE_PD_COMBINERS (disjoint_out_reverse, ZERO, INV_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (disjoint_atop, ONE_MINUS_INV_DA_OVER_SA, INV_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (disjoint_atop_reverse, INV_DA_OVER_SA, ONE_MINUS_INV_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (disjoint_xor, INV_DA_OVER_SA, INV_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (conjoint_clear, ZERO, ZERO) RVV_MAKE_PD_COMBINERS (conjoint_src, ONE, ZERO) RVV_MAKE_PD_COMBINERS (conjoint_dst, ZERO, ONE) RVV_MAKE_PD_COMBINERS (conjoint_over, ONE, ONE_MINUS_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (conjoint_over_reverse, ONE_MINUS_DA_OVER_SA, ONE) RVV_MAKE_PD_COMBINERS (conjoint_in, DA_OVER_SA, ZERO) RVV_MAKE_PD_COMBINERS (conjoint_in_reverse, ZERO, SA_OVER_DA) RVV_MAKE_PD_COMBINERS (conjoint_out, ONE_MINUS_DA_OVER_SA, ZERO) RVV_MAKE_PD_COMBINERS (conjoint_out_reverse, ZERO, ONE_MINUS_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (conjoint_atop, DA_OVER_SA, ONE_MINUS_SA_OVER_DA) RVV_MAKE_PD_COMBINERS (conjoint_atop_reverse, ONE_MINUS_DA_OVER_SA, SA_OVER_DA) RVV_MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA) #define RVV_MAKE_SEPARABLE_PDF_COMBINERS(name) \ static force_inline vfloat32m1_t rvv_combine_##name##_a ( \ vfloat32m1_t sa, vfloat32m1_t s, vfloat32m1_t da, vfloat32m1_t d, \ size_t vl) \ { \ return __riscv_vfsub_vv_f32m1 (__riscv_vfadd_vv_f32m1 (da, sa, vl), \ __riscv_vfmul_vv_f32m1 (da, sa, vl), \ vl); \ } \ \ static force_inline vfloat32m1_t rvv_combine_##name##_c ( \ vfloat32m1_t sa, vfloat32m1_t s, vfloat32m1_t da, vfloat32m1_t d, \ size_t vl) \ { \ vfloat32m1_t f = __riscv_vfmul_vf_f32m1 ( \ __riscv_vfadd_vv_f32m1 ( \ __riscv_vfmul_vv_f32m1 (__riscv_vfsub_vf_f32m1 (sa, 1.0f, vl), \ d, vl), \ __riscv_vfmul_vv_f32m1 (__riscv_vfsub_vf_f32m1 (da, 1.0f, vl), \ s, vl), \ vl), \ -1.0f, vl); \ \ return __riscv_vfadd_vv_f32m1 ( \ f, rvv_blend_##name##_float (sa, s, da, d, vl), vl); \ } \ \ RVV_MAKE_COMBINERS (name, rvv_combine_##name##_a, rvv_combine_##name##_c) RVV_MAKE_SEPARABLE_PDF_COMBINERS (multiply) RVV_MAKE_SEPARABLE_PDF_COMBINERS (screen) RVV_MAKE_SEPARABLE_PDF_COMBINERS (overlay) RVV_MAKE_SEPARABLE_PDF_COMBINERS (darken) RVV_MAKE_SEPARABLE_PDF_COMBINERS (lighten) RVV_MAKE_SEPARABLE_PDF_COMBINERS (color_dodge) RVV_MAKE_SEPARABLE_PDF_COMBINERS (color_burn) RVV_MAKE_SEPARABLE_PDF_COMBINERS (hard_light) RVV_MAKE_SEPARABLE_PDF_COMBINERS (soft_light) RVV_MAKE_SEPARABLE_PDF_COMBINERS (difference) RVV_MAKE_SEPARABLE_PDF_COMBINERS (exclusion) // int implementation // pixman-combine32.h RVV implementation plus some convenience functions { /* * x_c = min(x_c + y_c, 255) */ #define rvv_UN8_ADD_UN8_vv(x, y, vl) __riscv_vsaddu (x, y, vl) #define rvv_UN8x4_ADD_UN8x4_vv_m4(x, y, vl) \ RVV_U8x4_U32_m4 (rvv_UN8_ADD_UN8_vv (RVV_U32_U8x4_m4 (x), \ RVV_U32_U8x4_m4 (y), (vl) * 4)) /* * x_c = (x_c * a_c) / 255 */ #define __rvv_UN8_MUL_UN8_vv(LMUL, LMUL16) \ static force_inline VUINT8 (LMUL) rvv_UN8_MUL_UN8_vv_##LMUL ( \ const VUINT8 (LMUL) x, const VUINT8 (LMUL) a, size_t vl) \ { \ VUINT16 (LMUL16) \ mul_higher = __riscv_vwmaccu ( \ __riscv_vmv_v_x_u16##LMUL16 (ONE_HALF, vl), x, a, vl); \ \ VUINT16 (LMUL16) \ mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); \ \ return __riscv_vnsrl (__riscv_vadd (mul_higher, mul_lower, vl), \ G_SHIFT, vl); \ } __rvv_UN8_MUL_UN8_vv (m1, m2); __rvv_UN8_MUL_UN8_vv (m2, m4); __rvv_UN8_MUL_UN8_vv (m4, m8); static force_inline vuint8m4_t rvv_UN8_MUL_UN8_vx_m4 (const vuint8m4_t x, const uint8_t a, size_t vl) { vuint16m8_t mul_higher = __riscv_vwmaccu ( __riscv_vmv_v_x_u16m8 (ONE_HALF, vl), a, x, vl); vuint16m8_t mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); return __riscv_vnsrl (__riscv_vadd (mul_higher, mul_lower, vl), G_SHIFT, vl); } #define __rvv_UN8x4_MUL_UN8x4_vv(LMUL, x, a, vl) \ RVV_U8x4_U32 (LMUL, rvv_UN8_MUL_UN8_vv_##LMUL (RVV_U32_U8x4 (LMUL, x), \ RVV_U32_U8x4 (LMUL, a), \ (vl) * 4)) #define rvv_UN8x4_MUL_UN8x4_vv_m2(x, a, vl) \ __rvv_UN8x4_MUL_UN8x4_vv (m2, x, a, vl) #define rvv_UN8x4_MUL_UN8x4_vv_m4(x, a, vl) \ __rvv_UN8x4_MUL_UN8x4_vv (m4, x, a, vl) /* * a_c = a (broadcast to all components) */ #define __rvv_UN16_bcast_UN8x4_v(LMUL, LMUL16) \ static force_inline VUINT32 (LMUL) \ rvv_UN16_bcast_UN8x4_v_##LMUL (const VUINT16 (LMUL16) a, size_t vl) \ { \ VUINT32 (LMUL) \ a32 = __riscv_vwcvtu_x (__riscv_vmadd (a, 1 << 8, a, vl), vl); \ \ return __riscv_vmadd (a32, 1 << 16, a32, vl); \ } __rvv_UN16_bcast_UN8x4_v (m2, m1); __rvv_UN16_bcast_UN8x4_v (m4, m2); #define rvv_UN8_bcast_UN8x4_v_m4(a, vl) \ rvv_UN16_bcast_UN8x4_v_m4 (__riscv_vwcvtu_x (a, vl), vl) /* * x_c = (x_c * a) / 255 */ #define rvv_UN8x4_MUL_UN8_vv_m4(x, a, vl) \ rvv_UN8x4_MUL_UN8x4_vv_m4 (x, rvv_UN8_bcast_UN8x4_v_m4 (a, vl), vl) #define __rvv_UN8x4_MUL_UN16_vv(LMUL, x, a, vl) \ rvv_UN8x4_MUL_UN8x4_vv_##LMUL (x, rvv_UN16_bcast_UN8x4_v_##LMUL (a, vl), vl) #define rvv_UN8x4_MUL_UN16_vv_m2(x, a, vl) \ __rvv_UN8x4_MUL_UN16_vv (m2, x, a, vl) #define rvv_UN8x4_MUL_UN16_vv_m4(x, a, vl) \ __rvv_UN8x4_MUL_UN16_vv (m4, x, a, vl) #define rvv_UN8x4_MUL_UN8_vx_m4(x, a, vl) \ RVV_U8x4_U32_m4 (rvv_UN8_MUL_UN8_vx_m4 (RVV_U32_U8x4_m4 (x), a, (vl) * 4)) static force_inline vuint32m2_t rvv_DIV_ONE_UN32m2_UN32m2_v (const vuint32m2_t x, size_t vl) { vuint32m2_t mul_higher = __riscv_vadd (x, ONE_HALF, vl); vuint32m2_t mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); return __riscv_vsrl (__riscv_vadd (mul_higher, mul_lower, vl), G_SHIFT, vl); } static force_inline vuint8m2_t rvv_DIV_ONE_UN32m8_UN8m2_v (const vuint32m8_t x, size_t vl) { vuint32m8_t mul_higher = __riscv_vadd (x, ONE_HALF, vl); vuint32m8_t mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); return __riscv_vncvt_x ( __riscv_vnsrl (__riscv_vadd (mul_higher, mul_lower, vl), G_SHIFT, vl), vl); } /* * x_c = (x_c * a) / 255 + y_c */ #define rvv_UN8x4_MUL_UN16_ADD_UN8x4_vvv_m4(x, a, y, vl) \ rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN16_vv_m4 (x, a, vl), y, vl) /* * x_c = (x_c * a + y_c * b) / 255 */ #define rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4(x, a, y, b, vl) \ rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN16_vv_m4 (x, a, vl), \ rvv_UN8x4_MUL_UN16_vv_m4 (y, b, vl), vl) /* * x_c = (x_c * a_c) / 255 + y_c */ #define rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4(x, a, y, vl) \ rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN8x4_vv_m4 (x, a, vl), y, vl) /* * x_c = (x_c * a_c + y_c * b) / 255 */ #define rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4(x, a, y, b, vl) \ rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN8x4_vv_m4 (x, a, vl), \ rvv_UN8x4_MUL_UN16_vv_m4 (y, b, vl), vl) // } pixman-combine32.h // Additional functions. #define rvv_shift_alpha_u16(x, vl) __riscv_vnsrl (x, 24, vl) #define rvv_shift_not_alpha_u16(x, vl) \ rvv_shift_alpha_u16 (__riscv_vnot (x, vl), vl) #define rvv_load_alpha_u8m1(src, vl) \ __riscv_vlse8_v_u8m1 ((uint8_t *)src + 3, 4, vl) #define rvv_load_not_alpha_u8m1(src, vl) \ __riscv_vnot (rvv_load_alpha_u8m1 (src, vl), vl) #define rvv_u8m2_to_i16m4(in, vl) \ __riscv_vreinterpret_i16m4 (__riscv_vwcvtu_x (in, vl)) #define rvv_over_m4(src, dest, vl) \ rvv_UN8x4_MUL_UN16_ADD_UN8x4_vvv_m4 ( \ dest, rvv_shift_not_alpha_u16 (src, vl), src, vl) #define rvv_in_m4(x, y, vl) rvv_UN8x4_MUL_UN8_vv_m4 (x, y, vl) #define rvv_in_load_s_m_m4(src, mask, vl) \ rvv_in_m4 (__riscv_vle32_v_u32m4 (src, vl), \ rvv_load_alpha_u8m1 (mask, vl), vl) #define rvv_in_load_s_nm_m4(src, mask, vl) \ rvv_in_m4 (__riscv_vle32_v_u32m4 (src, vl), \ rvv_load_not_alpha_u8m1 (mask, vl), vl) static force_inline vuint16m2_t rvv_convert_8888_to_0565_m2 (const vuint32m4_t s, size_t vl) { vuint32m4_t rb = __riscv_vand (s, 0xF800F8, vl); return __riscv_vor ( __riscv_vor (__riscv_vnsrl (rb, 3, vl), __riscv_vnsrl (rb, 8, vl), vl), __riscv_vand (__riscv_vnsrl (s, 5, vl), 0x7E0, vl), vl); } static force_inline vuint32m4_t rvv_convert_0565_to_0888_m4 (const vuint16m2_t s, size_t vl) { vuint8m1_t g1, g2; vuint16m2_t r, g_w, b; vuint32m4_t r_w, rb_w; r = __riscv_vand (s, 0xF800, vl); b = __riscv_vand (s, 0x001F, vl); r_w = __riscv_vwmulu (r, 1 << 8, vl); rb_w = __riscv_vwmaccu (r_w, 1 << 3, b, vl); rb_w = __riscv_vand (__riscv_vor (rb_w, __riscv_vsrl (rb_w, 5, vl), vl), 0xFF00FF, vl); g1 = __riscv_vsll (__riscv_vnsrl (s, 5, vl), 2, vl); g2 = __riscv_vsrl (g1, 6, vl); g_w = __riscv_vwaddu_vv (g1, g2, vl); return __riscv_vwmaccu (rb_w, 1 << 8, g_w, vl); } #define rvv_convert_0565_to_8888_m4(s, vl) \ __riscv_vor (rvv_convert_0565_to_0888_m4 (s, vl), 0xff000000, vl) #define __rvv_combine_mask_value_ca(LMUL, src, mask, vl) \ rvv_UN8x4_MUL_UN8x4_vv_##LMUL (src, mask, vl) #define rvv_combine_mask_value_ca_m2(src, mask, vl) \ __rvv_combine_mask_value_ca (m2, src, mask, vl) #define rvv_combine_mask_value_ca_m4(src, mask, vl) \ __rvv_combine_mask_value_ca (m4, src, mask, vl) #define __rvv_combine_mask_alpha_ca(LMUL, src, mask, vl) \ rvv_UN8x4_MUL_UN16_vv_##LMUL (mask, rvv_shift_alpha_u16 (src, vl), vl) #define rvv_combine_mask_alpha_ca_m2(src, mask, vl) \ __rvv_combine_mask_alpha_ca (m2, src, mask, vl) #define rvv_combine_mask_alpha_ca_m4(src, mask, vl) \ __rvv_combine_mask_alpha_ca (m4, src, mask, vl) #define __rvv_combine_mask(LMUL, src, mask, vl) \ rvv_UN8x4_MUL_UN16_vv_##LMUL (src, rvv_shift_alpha_u16 (mask, vl), vl) #define rvv_combine_mask_m2(src, mask, vl) \ __rvv_combine_mask (m2, src, mask, vl) #define rvv_combine_mask_m4(src, mask, vl) \ __rvv_combine_mask (m4, src, mask, vl) #define __rvv_combine_mask_ca(LMUL) \ static force_inline void rvv_combine_mask_ca_##LMUL ( \ VUINT32 (LMUL) *__restrict__ src, VUINT32 (LMUL) *__restrict__ mask, \ size_t vl) \ { \ VUINT32 (LMUL) src_cpy = *src; \ *(src) = rvv_combine_mask_value_ca_##LMUL (*(src), *(mask), vl); \ *(mask) = rvv_combine_mask_alpha_ca_##LMUL (src_cpy, *(mask), vl); \ } __rvv_combine_mask_ca (m2); __rvv_combine_mask_ca (m4); static void rvv_combine_clear (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *pd = dest; vuint32m8_t v = __riscv_vmv_v_x_u32m8 (0, __riscv_vsetvlmax_e32m8 ()); RVV_FOREACH_1 (width, vl, e32m8, pd) { __riscv_vse32 (pd, v, vl); } } static void rvv_combine_src_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_in_load_s_m_m4 (ps, pm, vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m8, ps, pd) { __riscv_vse32 (pd, __riscv_vle32_v_u32m8 (ps, vl), vl); } } } static void rvv_combine_over_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_over_m4 (rvv_in_load_s_m_m4 (ps, pm, vl), __riscv_vle32_v_u32m4 (pd, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { __riscv_vse32 (pd, rvv_over_m4 (__riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pd, vl), vl), vl); } } } static void rvv_combine_over_reverse_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_over_m4 (__riscv_vle32_v_u32m4 (pd, vl), rvv_in_load_s_m_m4 (ps, pm, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { __riscv_vse32 (pd, rvv_over_m4 (__riscv_vle32_v_u32m4 (pd, vl), __riscv_vle32_v_u32m4 (ps, vl), vl), vl); } } } static void rvv_combine_in_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_in_m4 (rvv_in_load_s_m_m4 (ps, pm, vl), rvv_load_alpha_u8m1 (pd, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { __riscv_vse32 (pd, rvv_in_load_s_m_m4 (ps, pd, vl), vl); } } } static void rvv_combine_in_reverse_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_in_m4 (__riscv_vle32_v_u32m4 (pd, vl), rvv_UN8_MUL_UN8_vv_m1 ( rvv_load_alpha_u8m1 (ps, vl), rvv_load_alpha_u8m1 (pm, vl), vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { __riscv_vse32 (pd, rvv_in_load_s_m_m4 (pd, ps, vl), vl); } } } static void rvv_combine_out_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_in_m4 (rvv_in_load_s_m_m4 (ps, pm, vl), rvv_load_not_alpha_u8m1 (pd, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { __riscv_vse32 (pd, rvv_in_load_s_nm_m4 (ps, pd, vl), vl); } } } static void rvv_combine_out_reverse_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 ( pd, rvv_in_m4 (__riscv_vle32_v_u32m4 (pd, vl), __riscv_vnot (rvv_UN8_MUL_UN8_vv_m1 ( rvv_load_alpha_u8m1 (ps, vl), rvv_load_alpha_u8m1 (pm, vl), vl), vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { __riscv_vse32 (pd, rvv_in_load_s_nm_m4 (pd, ps, vl), vl); } } } static void rvv_combine_atop_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t s, d; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = rvv_in_load_s_m_m4 (ps, pm, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_alpha_u16 (d, vl), d, rvv_shift_not_alpha_u16 (s, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_alpha_u16 (d, vl), d, rvv_shift_not_alpha_u16 (s, vl), vl), vl); } } } static void rvv_combine_atop_reverse_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t s, d; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = rvv_in_load_s_m_m4 (ps, pm, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_not_alpha_u16 (d, vl), d, rvv_shift_alpha_u16 (s, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_not_alpha_u16 (d, vl), d, rvv_shift_alpha_u16 (s, vl), vl), vl); } } } static void rvv_combine_xor_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t s, d; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = rvv_in_load_s_m_m4 (ps, pm, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_not_alpha_u16 (d, vl), d, rvv_shift_not_alpha_u16 (s, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_not_alpha_u16 (d, vl), d, rvv_shift_not_alpha_u16 (s, vl), vl), vl); } } } static void rvv_combine_add_u (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 ( pd, rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), rvv_in_load_s_m_m4 (ps, pm, vl), vl), vl); } } else { RVV_FOREACH_2 (width, vl, e32m4, ps, pd) { __riscv_vse32 ( pd, rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), __riscv_vle32_v_u32m4 (ps, vl), vl), vl); } } } /* * Multiply * * ad * as * B(d / ad, s / as) * = ad * as * d/ad * s/as * = d * s * */ static void rvv_combine_multiply_u (pixman_implementation_t *imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t s, d; if (mask) { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = rvv_in_load_s_m_m4 (ps, pm, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_ADD_UN8x4_vv_m4 ( rvv_UN8x4_MUL_UN8x4_vv_m4 (d, s, vl), rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_not_alpha_u16 (d, vl), d, rvv_shift_not_alpha_u16 (s, vl), vl), vl), vl); } } else { RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_ADD_UN8x4_vv_m4 ( rvv_UN8x4_MUL_UN8x4_vv_m4 (d, s, vl), rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( s, rvv_shift_not_alpha_u16 (d, vl), d, rvv_shift_not_alpha_u16 (s, vl), vl), vl), vl); } } } static void rvv_combine_multiply_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t s, m, d; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); m = __riscv_vle32_v_u32m4 (pm, vl); rvv_combine_mask_ca_m4 (&s, &m, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_ADD_UN8x4_vv_m4 ( rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( d, __riscv_vnot (m, vl), s, rvv_shift_not_alpha_u16 (d, vl), vl), rvv_UN8x4_MUL_UN8x4_vv_m4 (d, s, vl), vl), vl); } } #define PDF_SEPARABLE_BLEND_MODE(name) \ static void rvv_combine_##name##_u ( \ pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, \ const uint32_t *src, const uint32_t *mask, int width) \ { \ uint32_t *__restrict__ pd = dest; \ const uint32_t *__restrict__ ps = src; \ const uint32_t *__restrict__ pm = mask; \ \ vuint32m2_t s, d, ra, rx; \ vuint16m1_t da, sa; \ size_t vl4; \ vuint8m2_t s4, d4, sa4, isa4, da4, ida4; \ vuint32m8_t rx4; \ \ RVV_FOREACH_3 (width, vl, e32m2, ps, pm, pd) \ { \ vl4 = vl * 4; \ \ s = __riscv_vle32_v_u32m2 (ps, vl); \ if (mask) \ s = rvv_combine_mask_m2 (s, __riscv_vle32_v_u32m2 (pm, vl), \ vl); \ sa = rvv_shift_alpha_u16 (s, vl); \ \ d = __riscv_vle32_v_u32m2 (pd, vl); \ da = rvv_shift_alpha_u16 (d, vl); \ \ ra = __riscv_vsub (__riscv_vwaddu_vv (__riscv_vmul (da, 0xFF, vl), \ __riscv_vmul (sa, 0xFF, vl), \ vl), \ __riscv_vwmulu (sa, da, vl), vl); \ \ s4 = RVV_U32_U8x4_m2 (s); \ sa4 = RVV_U32_U8x4_m2 (rvv_UN16_bcast_UN8x4_v_m2 (sa, vl)); \ isa4 = __riscv_vnot (sa4, vl4); \ d4 = RVV_U32_U8x4_m2 (d); \ da4 = RVV_U32_U8x4_m2 (rvv_UN16_bcast_UN8x4_v_m2 (da, vl)); \ ida4 = __riscv_vnot (da4, vl4); \ \ rx4 = __riscv_vadd ( \ __riscv_vwaddu_vv (__riscv_vwmulu (isa4, d4, vl4), \ __riscv_vwmulu (ida4, s4, vl4), vl4), \ rvv_blend_##name##_int (d4, da4, s4, sa4, vl4), vl4); \ \ ra = __riscv_vminu (ra, 255 * 255, vl); \ rx4 = __riscv_vminu (rx4, 255 * 255, vl4); \ \ ra = rvv_DIV_ONE_UN32m2_UN32m2_v (ra, vl); \ rx = RVV_U8x4_U32_m2 (rvv_DIV_ONE_UN32m8_UN8m2_v (rx4, vl4)); \ \ __riscv_vse32 (pd, \ __riscv_vor (__riscv_vsll (ra, 24, vl), \ __riscv_vand (rx, 0x00FFFFFF, vl), \ vl), \ vl); \ } \ } \ \ static void rvv_combine_##name##_ca ( \ pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, \ const uint32_t *src, const uint32_t *mask, int width) \ { \ uint32_t *__restrict__ pd = dest; \ const uint32_t *__restrict__ ps = src; \ const uint32_t *__restrict__ pm = mask; \ \ vuint32m2_t s, m, d, ra, rx; \ vuint16m1_t da, sa; \ size_t vl4; \ vuint8m2_t s4, m4, d4, ixa4, da4, ida4; \ vuint32m8_t rx4; \ \ RVV_FOREACH_3 (width, vl, e32m2, ps, pm, pd) \ { \ m = __riscv_vle32_v_u32m2 (pm, vl); \ s = __riscv_vle32_v_u32m2 (ps, vl); \ rvv_combine_mask_ca_m2 (&s, &m, vl); \ sa = rvv_shift_alpha_u16 (s, vl); \ \ d = __riscv_vle32_v_u32m2 (pd, vl); \ da = rvv_shift_alpha_u16 (d, vl); \ \ ra = __riscv_vsub (__riscv_vwaddu_vv (__riscv_vmul (da, 0xFF, vl), \ __riscv_vmul (sa, 0xFF, vl), \ vl), \ __riscv_vwmulu (sa, da, vl), vl); \ \ ixa4 = RVV_U32_U8x4_m2 (__riscv_vnot (m, vl)); \ d4 = RVV_U32_U8x4_m2 (d); \ ida4 = RVV_U32_U8x4_m2 ( \ __riscv_vnot (rvv_UN16_bcast_UN8x4_v_m2 (da, vl), vl)); \ s4 = RVV_U32_U8x4_m2 (s); \ da4 = RVV_U32_U8x4_m2 (rvv_UN16_bcast_UN8x4_v_m2 (da, vl)); \ m4 = RVV_U32_U8x4_m2 (m); \ \ vl4 = vl * 4; \ rx4 = __riscv_vadd ( \ __riscv_vwaddu_vv (__riscv_vwmulu (ixa4, d4, vl4), \ __riscv_vwmulu (ida4, s4, vl4), vl4), \ rvv_blend_##name##_int (d4, da4, s4, m4, vl4), vl4); \ \ ra = __riscv_vminu (ra, 255 * 255, vl); \ rx4 = __riscv_vminu (rx4, 255 * 255, vl4); \ \ ra = rvv_DIV_ONE_UN32m2_UN32m2_v (ra, vl); \ rx = RVV_U8x4_U32_m2 (rvv_DIV_ONE_UN32m8_UN8m2_v (rx4, vl4)); \ \ __riscv_vse32 (pd, \ __riscv_vor (__riscv_vsll (ra, 24, vl), \ __riscv_vand (rx, 0x00FFFFFF, vl), \ vl), \ vl); \ } \ } static force_inline vuint32m8_t rvv_blend_screen_int (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, size_t vl) { return __riscv_vsub (__riscv_vwaddu_vv (__riscv_vwmulu (s, ad, vl), __riscv_vwmulu (d, as, vl), vl), __riscv_vwcvtu_x (__riscv_vwmulu (s, d, vl), vl), vl); } PDF_SEPARABLE_BLEND_MODE (screen) static force_inline vuint32m8_t _rvv_blend_overlay_hard_light (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, const vbool4_t selector, size_t vl) { vuint32m8_t out_true = __riscv_vwmulu (__riscv_vwmulu (s, d, vl), 2, vl); vint16m4_t d_i = rvv_u8m2_to_i16m4 (d, vl); vint16m4_t ad_i = rvv_u8m2_to_i16m4 (ad, vl); vint16m4_t s_i = rvv_u8m2_to_i16m4 (s, vl); vint16m4_t as_i = rvv_u8m2_to_i16m4 (as, vl); vuint32m8_t out_false = __riscv_vreinterpret_v_i32m8_u32m8 (__riscv_vsub ( __riscv_vwmul (as_i, ad_i, vl), __riscv_vsll (__riscv_vwmul (__riscv_vsub (ad_i, d_i, vl), __riscv_vsub (as_i, s_i, vl), vl), 1, vl), vl)); return __riscv_vmerge (out_false, out_true, selector, vl); } static force_inline vuint32m8_t rvv_blend_overlay_int (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, size_t vl) { return _rvv_blend_overlay_hard_light ( d, ad, s, as, __riscv_vmsltu (__riscv_vwmulu (d, 2, vl), __riscv_vwcvtu_x (ad, vl), vl), vl); } PDF_SEPARABLE_BLEND_MODE (overlay) static force_inline vuint32m8_t rvv_blend_darken_int (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, size_t vl) { return __riscv_vwcvtu_x (__riscv_vminu (__riscv_vwmulu (ad, s, vl), __riscv_vwmulu (as, d, vl), vl), vl); } PDF_SEPARABLE_BLEND_MODE (darken) static force_inline vuint32m8_t rvv_blend_lighten_int (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, size_t vl) { return __riscv_vwcvtu_x (__riscv_vmaxu (__riscv_vwmulu (as, d, vl), __riscv_vwmulu (ad, s, vl), vl), vl); } PDF_SEPARABLE_BLEND_MODE (lighten) static force_inline vuint32m8_t rvv_blend_hard_light_int (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, size_t vl) { return _rvv_blend_overlay_hard_light ( d, ad, s, as, __riscv_vmsltu (__riscv_vwmulu (s, 2, vl), __riscv_vwcvtu_x (as, vl), vl), vl); } PDF_SEPARABLE_BLEND_MODE (hard_light) static force_inline vuint32m8_t rvv_blend_difference_int (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, size_t vl) { vuint16m4_t das = __riscv_vwmulu (d, as, vl); vuint16m4_t sad = __riscv_vwmulu (s, ad, vl); return __riscv_vmerge (__riscv_vwsubu_vv (sad, das, vl), __riscv_vwsubu_vv (das, sad, vl), __riscv_vmsltu (sad, das, vl), vl); } PDF_SEPARABLE_BLEND_MODE (difference) static force_inline vuint32m8_t rvv_blend_exclusion_int (const vuint8m2_t d, const vuint8m2_t ad, const vuint8m2_t s, const vuint8m2_t as, size_t vl) { return __riscv_vsub (__riscv_vwaddu_vv (__riscv_vwmulu (s, ad, vl), __riscv_vwmulu (d, as, vl), vl), __riscv_vwmulu (__riscv_vwmulu (d, s, vl), 2, vl), vl); } PDF_SEPARABLE_BLEND_MODE (exclusion) #undef PDF_SEPARABLE_BLEND_MODE static void rvv_combine_over_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t s, m; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); m = __riscv_vle32_v_u32m4 (pm, vl); rvv_combine_mask_ca_m4 (&s, &m, vl); __riscv_vse32 ( pd, rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( __riscv_vle32_v_u32m4 (pd, vl), __riscv_vnot (m, vl), s, vl), vl); } } static void rvv_combine_over_reverse_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t d; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 ( pd, rvv_UN8x4_MUL_UN16_ADD_UN8x4_vvv_m4 ( rvv_UN8x4_MUL_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pm, vl), vl), rvv_shift_not_alpha_u16 (d, vl), d, vl), vl); } } static void rvv_combine_atop_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t d, s, m; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); m = __riscv_vle32_v_u32m4 (pm, vl); rvv_combine_mask_ca_m4 (&s, &m, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 ( pd, rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( d, __riscv_vnot (m, vl), s, rvv_shift_alpha_u16 (d, vl), vl), vl); } } static void rvv_combine_xor_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t d, s, m; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); m = __riscv_vle32_v_u32m4 (pm, vl); rvv_combine_mask_ca_m4 (&s, &m, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( d, __riscv_vnot (m, vl), s, rvv_shift_not_alpha_u16 (d, vl), vl), vl); } } static void rvv_combine_atop_reverse_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; vuint32m4_t d, s, m; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { s = __riscv_vle32_v_u32m4 (ps, vl); m = __riscv_vle32_v_u32m4 (pm, vl); rvv_combine_mask_ca_m4 (&s, &m, vl); d = __riscv_vle32_v_u32m4 (pd, vl); __riscv_vse32 (pd, rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( d, m, s, rvv_shift_not_alpha_u16 (d, vl), vl), vl); } } static void rvv_combine_src_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 ( pd, rvv_combine_mask_value_ca_m4 (__riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pm, vl), vl), vl); } } static void rvv_combine_in_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_in_m4 (rvv_combine_mask_value_ca_m4 ( __riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pm, vl), vl), rvv_load_alpha_u8m1 (pd, vl), vl), vl); } } static void rvv_combine_in_reverse_ca (pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 ( pd, rvv_UN8x4_MUL_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), rvv_combine_mask_alpha_ca_m4 ( __riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pm, vl), vl), vl), vl); } } static void rvv_combine_out_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 (pd, rvv_in_m4 (rvv_combine_mask_value_ca_m4 ( __riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pm, vl), vl), rvv_load_not_alpha_u8m1 (pd, vl), vl), vl); } } static void rvv_combine_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 ( pd, rvv_UN8x4_MUL_UN8x4_vv_m4 ( __riscv_vle32_v_u32m4 (pd, vl), __riscv_vnot_v_u32m4 (rvv_combine_mask_alpha_ca_m4 ( __riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pm, vl), vl), vl), vl), vl); } } static void rvv_combine_add_ca (pixman_implementation_t *__restrict__ imp, pixman_op_t op, uint32_t *__restrict__ dest, const uint32_t *__restrict__ src, const uint32_t *__restrict__ mask, int width) { uint32_t *__restrict__ pd = dest; const uint32_t *__restrict__ ps = src; const uint32_t *__restrict__ pm = mask; RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) { __riscv_vse32 ( pd, rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), rvv_combine_mask_value_ca_m4 ( __riscv_vle32_v_u32m4 (ps, vl), __riscv_vle32_v_u32m4 (pm, vl), vl), vl), vl); } } static void rvv_composite_src_x888_8888 (pixman_implementation_t *__restrict__ imp, pixman_composite_info_t *__restrict__ info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *__restrict__ dst_line, *__restrict__ dst; uint32_t *__restrict__ src_line, *__restrict__ src; int32_t dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e32m8, src, dst) { __riscv_vse32 ( dst, __riscv_vor (__riscv_vle32_v_u32m8 (src, vl), 0xff000000, vl), vl); } } } static void rvv_composite_src_8888_8888 (pixman_implementation_t *__restrict__ imp, pixman_composite_info_t *__restrict__ info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *__restrict__ dst_line, *__restrict__ dst; uint32_t *__restrict__ src_line, *__restrict__ src; int32_t dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e32m8, src, dst) { __riscv_vse32 (dst, __riscv_vle32_v_u32m8 (src, vl), vl); } } } static void rvv_composite_over_x888_8_8888 (pixman_implementation_t *__restrict__ imp, pixman_composite_info_t *__restrict__ info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *__restrict__ src, *__restrict__ src_line; uint32_t *__restrict__ dst, *__restrict__ dst_line; uint8_t *__restrict__ mask, *__restrict__ mask_line; int32_t src_stride, mask_stride, dst_stride; PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { src = src_line; src_line += src_stride; dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_3 (width, vl, e32m4, src, mask, dst) { __riscv_vse32 ( dst, rvv_over_m4 ( rvv_in_m4 (__riscv_vor (__riscv_vle32_v_u32m4 (src, vl), 0xff000000, vl), __riscv_vle8_v_u8m1 (mask, vl), vl), __riscv_vle32_v_u32m4 (dst, vl), vl), vl); } } } static void rvv_composite_over_8888_8888 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e32m4, src, dst) { __riscv_vse32 (dst, rvv_over_m4 (__riscv_vle32_v_u32m4 (src, vl), __riscv_vle32_v_u32m4 (dst, vl), vl), vl); } } } static void rvv_composite_over_n_8_0565 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint16_t *__restrict__ dst_line, *__restrict__ dst; uint8_t *__restrict__ mask_line, *__restrict__ mask; int dst_stride, mask_stride; uint32_t src; vuint32m4_t vsrc; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e16m2, mask, dst) { __riscv_vse16 ( dst, rvv_convert_8888_to_0565_m2 ( rvv_over_m4 ( rvv_in_m4 (vsrc, __riscv_vle8_v_u8m1 (mask, vl), vl), rvv_convert_0565_to_0888_m4 ( __riscv_vle16_v_u16m2 (dst, vl), vl), vl), vl), vl); } } } static void rvv_composite_over_n_8_8888 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; uint32_t src; vuint32m4_t vsrc; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e32m4, mask, dst) { __riscv_vse32 ( dst, rvv_over_m4 ( rvv_in_m4 (vsrc, __riscv_vle8_v_u8m1 (mask, vl), vl), __riscv_vle32_v_u32m4 (dst, vl), vl), vl); } } } static void rvv_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *mask_line, *mask; int dst_stride, mask_stride; uint32_t src; vuint32m4_t vsrc; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e32m4, mask, dst) { __riscv_vse32 (dst, rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( __riscv_vle32_v_u32m4 (mask, vl), vsrc, __riscv_vle32_v_u32m4 (dst, vl), vl), vl); } } } static void rvv_composite_over_n_8888_8888_ca (pixman_implementation_t *__restrict__ imp, pixman_composite_info_t *__restrict__ info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *__restrict__ dst_line, *__restrict__ dst; uint32_t *__restrict__ mask_line, *__restrict__ mask; int dst_stride, mask_stride; uint32_t src, srca; vuint32m4_t vsrc; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; srca = src >> 24; vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e32m4, mask, dst) { vuint32m4_t m = __riscv_vle32_v_u32m4 (mask, vl); __riscv_vse32 ( dst, rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( __riscv_vle32_v_u32m4 (dst, vl), __riscv_vnot (rvv_UN8x4_MUL_UN8_vx_m4 (m, srca, vl), vl), rvv_UN8x4_MUL_UN8x4_vv_m4 (m, vsrc, vl), vl), vl); } } } static void rvv_composite_over_n_8888_0565_ca (pixman_implementation_t *__restrict__ imp, pixman_composite_info_t *__restrict__ info) { PIXMAN_COMPOSITE_ARGS (info); uint16_t *__restrict__ dst_line, *__restrict__ dst; uint32_t *__restrict__ mask_line, *__restrict__ mask; int dst_stride, mask_stride; uint32_t src, srca; vuint32m4_t vsrc; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e32m4, mask, dst) { vuint32m4_t ma = __riscv_vle32_v_u32m4 (mask, vl); __riscv_vse16 ( dst, rvv_convert_8888_to_0565_m2 ( rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( rvv_convert_0565_to_0888_m4 ( __riscv_vle16_v_u16m2 (dst, vl), vl), __riscv_vnot (rvv_UN8x4_MUL_UN8_vx_m4 (ma, srca, vl), vl), rvv_UN8x4_MUL_UN8x4_vv_m4 (ma, vsrc, vl), vl), vl), vl); } } } static void rvv_composite_over_8888_0565 (pixman_implementation_t *__restrict__ imp, pixman_composite_info_t *__restrict__ info) { PIXMAN_COMPOSITE_ARGS (info); uint16_t *__restrict__ dst_line, *__restrict__ dst; uint32_t *__restrict__ src_line, *__restrict__ src; int dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e16m2, src, dst) { __riscv_vse16 ( dst, rvv_convert_8888_to_0565_m2 ( rvv_over_m4 (__riscv_vle32_v_u32m4 (src, vl), rvv_convert_0565_to_0888_m4 ( __riscv_vle16_v_u16m2 (dst, vl), vl), vl), vl), vl); } } } static void rvv_composite_add_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e8m8, src, dst) { __riscv_vse8 (dst, rvv_UN8_ADD_UN8_vv (__riscv_vle8_v_u8m8 (src, vl), __riscv_vle8_v_u8m8 (dst, vl), vl), vl); } } } static void rvv_composite_add_0565_0565 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint16_t *dst_line, *dst; uint16_t *src_line, *src; int dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e16m2, src, dst) { __riscv_vse16 (dst, rvv_convert_8888_to_0565_m2 ( rvv_UN8x4_ADD_UN8x4_vv_m4 ( rvv_convert_0565_to_8888_m4 ( __riscv_vle16_v_u16m2 (src, vl), vl), rvv_convert_0565_to_8888_m4 ( __riscv_vle16_v_u16m2 (dst, vl), vl), vl), vl), vl); } } } static void rvv_composite_add_8888_8888 (pixman_implementation_t *__restrict__ imp, pixman_composite_info_t *__restrict__ info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *__restrict__ dst_line, *__restrict__ dst; uint32_t *__restrict__ src_line, *__restrict__ src; int dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e32m4, src, dst) { __riscv_vse32 ( dst, rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (src, vl), __riscv_vle32_v_u32m4 (dst, vl), vl), vl); } } } static void rvv_composite_add_n_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; uint32_t src; uint8_t sa; PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); sa = (src >> 24); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e8m4, mask, dst) { __riscv_vse8 ( dst, rvv_UN8_ADD_UN8_vv (rvv_UN8_MUL_UN8_vx_m4 ( __riscv_vle8_v_u8m4 (mask, vl), sa, vl), __riscv_vle8_v_u8m4 (dst, vl), vl), vl); } } } static void rvv_composite_src_memcpy (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; uint32_t n_bytes = width * bpp; int dst_stride, src_stride; uint8_t *dst; uint8_t *src; src_stride = src_image->bits.rowstride * 4; dst_stride = dest_image->bits.rowstride * 4; src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; while (height--) { memcpy (dst, src, n_bytes); dst += dst_stride; src += src_stride; } } static void rvv_composite_in_n_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); if (srca == 0xff) { while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e8m4, mask, dst) { __riscv_vse8 ( dst, rvv_UN8_MUL_UN8_vv_m4 (__riscv_vle8_v_u8m4 (mask, vl), __riscv_vle8_v_u8m4 (dst, vl), vl), vl); } } } else { while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; RVV_FOREACH_2 (width, vl, e8m4, mask, dst) { __riscv_vse8 (dst, rvv_UN8_MUL_UN8_vv_m4 ( rvv_UN8_MUL_UN8_vx_m4 ( __riscv_vle8_v_u8m4 (mask, vl), srca, vl), __riscv_vle8_v_u8m4 (dst, vl), vl), vl); } } } } static void rvv_composite_in_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int dst_stride, src_stride; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; RVV_FOREACH_2 (width, vl, e8m4, src, dst) { __riscv_vse8 (dst, rvv_UN8_MUL_UN8_vv_m4 (__riscv_vle8_v_u8m4 (src, vl), __riscv_vle8_v_u8m4 (dst, vl), vl), vl); } } } #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) /* * There is some potential for hand vectorization, but for now let's leave it * autovectorized. */ static force_inline void pixman_fill1_line (uint32_t *dst, int offs, int width, int v) { if (offs) { int leading_pixels = 32 - offs; if (leading_pixels >= width) { if (v) *dst |= A1_FILL_MASK (width, offs); else *dst &= ~A1_FILL_MASK (width, offs); return; } else { if (v) *dst++ |= A1_FILL_MASK (leading_pixels, offs); else *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); width -= leading_pixels; } } while (width >= 32) { if (v) *dst++ = 0xFFFFFFFF; else *dst++ = 0; width -= 32; } if (width > 0) { if (v) *dst |= A1_FILL_MASK (width, 0); else *dst &= ~A1_FILL_MASK (width, 0); } } static void rvv_fill1 (uint32_t *bits, int stride, int x, int y, int width, int height, uint32_t filler) { uint32_t *dst = bits + y * stride + (x >> 5); int offs = x & 31; while (height--) { pixman_fill1_line (dst, offs, width, (filler & 1)); dst += stride; } } #define RVV_FILL(dtypew) \ static void rvv_fill_u##dtypew (uint32_t *__restrict__ bits, int stride, \ int x, int y, int width, int height, \ uint32_t filler) \ { \ uint##dtypew##_t *__restrict__ bitsw = (uint##dtypew##_t *)bits; \ int32_t vstride = stride * (32 / dtypew); \ vuint##dtypew##m8_t vfiller = __riscv_vmv_v_x_u##dtypew##m8 ( \ (uint##dtypew##_t)filler, __riscv_vsetvlmax_e##dtypew##m8 ()); \ \ bitsw += y * vstride + x; \ while (height--) \ { \ uint##dtypew##_t *__restrict__ d = bitsw; \ \ RVV_FOREACH_1 (width, vl, e##dtypew##m8, d) \ { \ __riscv_vse##dtypew (d, vfiller, vl); \ } \ \ bitsw += vstride; \ } \ } RVV_FILL (8); RVV_FILL (16); RVV_FILL (32); static pixman_bool_t rvv_fill (pixman_implementation_t *__restrict__ imp, uint32_t *__restrict__ bits, int stride, int bpp, int x, int y, int width, int height, uint32_t filler) { switch (bpp) { case 1: rvv_fill1 (bits, stride, x, y, width, height, filler); break; case 8: rvv_fill_u8 (bits, stride, x, y, width, height, filler); break; case 16: rvv_fill_u16 (bits, stride, x, y, width, height, filler); break; case 32: rvv_fill_u32 (bits, stride, x, y, width, height, filler); break; default: return FALSE; } return TRUE; } static void rvv_composite_solid_fill (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t src; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (dest_image->bits.format == PIXMAN_a1) { src = src >> 31; } else if (dest_image->bits.format == PIXMAN_a8) { src = src >> 24; } else if (dest_image->bits.format == PIXMAN_r5g6b5 || dest_image->bits.format == PIXMAN_b5g6r5) { src = convert_8888_to_0565 (src); } rvv_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, PIXMAN_FORMAT_BPP (dest_image->bits.format), dest_x, dest_y, width, height, src); } #define RVV_BLT(dtypew) \ static void rvv_blt_u##dtypew ( \ uint32_t *__restrict__ src_bits, uint32_t *__restrict__ dst_bits, \ int src_stride, int dst_stride, int src_x, int src_y, int dest_x, \ int dest_y, int width, int height) \ { \ uint##dtypew##_t *src_w = (uint##dtypew##_t *)src_bits; \ uint##dtypew##_t *dst_w = (uint##dtypew##_t *)dst_bits; \ \ src_stride = src_stride * (32 / dtypew); \ dst_stride = dst_stride * (32 / dtypew); \ \ src_w += src_stride * src_y + src_x; \ dst_w += dst_stride * dest_y + dest_x; \ \ while (height--) \ { \ uint##dtypew##_t *__restrict__ pd = dst_w; \ uint##dtypew##_t *__restrict__ ps = src_w; \ \ RVV_FOREACH_2 (width, vl, e##dtypew##m8, ps, pd) \ { \ __riscv_vse##dtypew ( \ pd, __riscv_vle##dtypew##_v_u##dtypew##m8 (ps, vl), vl); \ } \ \ dst_w += dst_stride; \ src_w += src_stride; \ } \ } RVV_BLT (8); RVV_BLT (16); RVV_BLT (32); static pixman_bool_t rvv_blt (pixman_implementation_t *__restrict__ imp, uint32_t *__restrict__ src_bits, uint32_t *__restrict__ dst_bits, int src_stride, int dst_stride, int src_bpp, int dst_bpp, int src_x, int src_y, int dest_x, int dest_y, int width, int height) { if (src_bpp != dst_bpp) return FALSE; switch (src_bpp) { case 8: rvv_blt_u8 (src_bits, dst_bits, src_stride, dst_stride, src_x, src_y, dest_x, dest_y, width, height); break; case 16: rvv_blt_u16 (src_bits, dst_bits, src_stride, dst_stride, src_x, src_y, dest_x, dest_y, width, height); break; case 32: rvv_blt_u32 (src_bits, dst_bits, src_stride, dst_stride, src_x, src_y, dest_x, dest_y, width, height); break; default: return FALSE; } return TRUE; } // clang-format off static const pixman_fast_path_t rvv_fast_paths[] = { PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, rvv_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, rvv_composite_over_n_8_0565), // PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, rvv_composite_over_n_8_0888), // PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, rvv_composite_over_n_8_0888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, rvv_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, rvv_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, rvv_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, rvv_composite_over_n_8_8888), // PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, rvv_composite_over_n_1_8888), // PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, rvv_composite_over_n_1_8888), // PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, rvv_composite_over_n_1_8888), // PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, rvv_composite_over_n_1_8888), // PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, rvv_composite_over_n_1_0565), // PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, rvv_composite_over_n_1_0565), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, rvv_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, rvv_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, rvv_composite_over_n_8888_0565_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, rvv_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, rvv_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, rvv_composite_over_n_8888_0565_ca), PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, rvv_composite_over_x888_8_8888), PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, rvv_composite_over_x888_8_8888), PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, rvv_composite_over_x888_8_8888), PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, rvv_composite_over_x888_8_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, rvv_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, rvv_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, rvv_composite_over_8888_0565), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, rvv_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, rvv_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, rvv_composite_over_8888_0565), PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, rvv_composite_add_0565_0565), PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, rvv_composite_add_0565_0565), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, rvv_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, rvv_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, rvv_composite_add_8_8), // PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, rvv_composite_add_n_8888_8888_ca), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, rvv_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, rvv_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, rvv_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, rvv_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, rvv_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, rvv_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, rvv_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, rvv_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, rvv_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, rvv_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, rvv_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, rvv_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, rvv_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, rvv_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, rvv_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, rvv_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, rvv_composite_src_memcpy), PIXMAN_STD_FAST_PATH (IN, a8, null, a8, rvv_composite_in_8_8), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, rvv_composite_in_n_8_8), PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, rvv_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, rvv_composite_src_8888_8888), {PIXMAN_OP_NONE}, }; pixman_implementation_t * _pixman_implementation_create_rvv (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create ( fallback, rvv_fast_paths); // clang-format off imp->combine_float[PIXMAN_OP_CLEAR] = rvv_combine_clear_u_float; imp->combine_float[PIXMAN_OP_SRC] = rvv_combine_src_u_float; imp->combine_float[PIXMAN_OP_DST] = rvv_combine_dst_u_float; imp->combine_float[PIXMAN_OP_OVER] = rvv_combine_over_u_float; imp->combine_float[PIXMAN_OP_OVER_REVERSE] = rvv_combine_over_reverse_u_float; imp->combine_float[PIXMAN_OP_IN] = rvv_combine_in_u_float; imp->combine_float[PIXMAN_OP_IN_REVERSE] = rvv_combine_in_reverse_u_float; imp->combine_float[PIXMAN_OP_OUT] = rvv_combine_out_u_float; imp->combine_float[PIXMAN_OP_OUT_REVERSE] = rvv_combine_out_reverse_u_float; imp->combine_float[PIXMAN_OP_ATOP] = rvv_combine_atop_u_float; imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = rvv_combine_atop_reverse_u_float; imp->combine_float[PIXMAN_OP_XOR] = rvv_combine_xor_u_float; imp->combine_float[PIXMAN_OP_ADD] = rvv_combine_add_u_float; imp->combine_float[PIXMAN_OP_SATURATE] = rvv_combine_saturate_u_float; /* Disjoint, unified */ imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = rvv_combine_disjoint_clear_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = rvv_combine_disjoint_src_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_DST] = rvv_combine_disjoint_dst_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = rvv_combine_disjoint_over_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = rvv_combine_disjoint_over_reverse_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_IN] = rvv_combine_disjoint_in_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = rvv_combine_disjoint_in_reverse_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = rvv_combine_disjoint_out_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = rvv_combine_disjoint_out_reverse_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = rvv_combine_disjoint_atop_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = rvv_combine_disjoint_atop_reverse_u_float; imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = rvv_combine_disjoint_xor_u_float; /* Conjoint, unified */ imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = rvv_combine_conjoint_clear_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = rvv_combine_conjoint_src_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_DST] = rvv_combine_conjoint_dst_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = rvv_combine_conjoint_over_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = rvv_combine_conjoint_over_reverse_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_IN] = rvv_combine_conjoint_in_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = rvv_combine_conjoint_in_reverse_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = rvv_combine_conjoint_out_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = rvv_combine_conjoint_out_reverse_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = rvv_combine_conjoint_atop_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = rvv_combine_conjoint_atop_reverse_u_float; imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = rvv_combine_conjoint_xor_u_float; /* PDF operators, unified */ imp->combine_float[PIXMAN_OP_MULTIPLY] = rvv_combine_multiply_u_float; imp->combine_float[PIXMAN_OP_SCREEN] = rvv_combine_screen_u_float; imp->combine_float[PIXMAN_OP_OVERLAY] = rvv_combine_overlay_u_float; imp->combine_float[PIXMAN_OP_DARKEN] = rvv_combine_darken_u_float; imp->combine_float[PIXMAN_OP_LIGHTEN] = rvv_combine_lighten_u_float; imp->combine_float[PIXMAN_OP_HARD_LIGHT] = rvv_combine_hard_light_u_float; imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = rvv_combine_soft_light_u_float; imp->combine_float[PIXMAN_OP_DIFFERENCE] = rvv_combine_difference_u_float; imp->combine_float[PIXMAN_OP_EXCLUSION] = rvv_combine_exclusion_u_float; imp->combine_float[PIXMAN_OP_COLOR_DODGE] = rvv_combine_color_dodge_u_float; imp->combine_float[PIXMAN_OP_COLOR_BURN] = rvv_combine_color_burn_u_float; /* Component alpha combiners */ imp->combine_float_ca[PIXMAN_OP_CLEAR] = rvv_combine_clear_ca_float; imp->combine_float_ca[PIXMAN_OP_SRC] = rvv_combine_src_ca_float; imp->combine_float_ca[PIXMAN_OP_DST] = rvv_combine_dst_ca_float; imp->combine_float_ca[PIXMAN_OP_OVER] = rvv_combine_over_ca_float; imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = rvv_combine_over_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_IN] = rvv_combine_in_ca_float; imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = rvv_combine_in_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_OUT] = rvv_combine_out_ca_float; imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = rvv_combine_out_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_ATOP] = rvv_combine_atop_ca_float; imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = rvv_combine_atop_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_XOR] = rvv_combine_xor_ca_float; imp->combine_float_ca[PIXMAN_OP_ADD] = rvv_combine_add_ca_float; imp->combine_float_ca[PIXMAN_OP_SATURATE] = rvv_combine_saturate_ca_float; /* Disjoint CA */ imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = rvv_combine_disjoint_clear_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = rvv_combine_disjoint_src_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = rvv_combine_disjoint_dst_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = rvv_combine_disjoint_over_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = rvv_combine_disjoint_over_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = rvv_combine_disjoint_in_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = rvv_combine_disjoint_in_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = rvv_combine_disjoint_out_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = rvv_combine_disjoint_out_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = rvv_combine_disjoint_atop_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = rvv_combine_disjoint_atop_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = rvv_combine_disjoint_xor_ca_float; /* Conjoint CA */ imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = rvv_combine_conjoint_clear_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = rvv_combine_conjoint_src_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = rvv_combine_conjoint_dst_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = rvv_combine_conjoint_over_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = rvv_combine_conjoint_over_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = rvv_combine_conjoint_in_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = rvv_combine_conjoint_in_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = rvv_combine_conjoint_out_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = rvv_combine_conjoint_out_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = rvv_combine_conjoint_atop_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = rvv_combine_conjoint_atop_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = rvv_combine_conjoint_xor_ca_float; /* PDF operators CA */ imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = rvv_combine_multiply_ca_float; imp->combine_float_ca[PIXMAN_OP_SCREEN] = rvv_combine_screen_ca_float; imp->combine_float_ca[PIXMAN_OP_OVERLAY] = rvv_combine_overlay_ca_float; imp->combine_float_ca[PIXMAN_OP_DARKEN] = rvv_combine_darken_ca_float; imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = rvv_combine_lighten_ca_float; imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = rvv_combine_color_dodge_ca_float; imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = rvv_combine_color_burn_ca_float; imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = rvv_combine_hard_light_ca_float; imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = rvv_combine_soft_light_ca_float; imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = rvv_combine_difference_ca_float; imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = rvv_combine_exclusion_ca_float; /* It is not clear that these make sense, so make them noops for now */ imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = rvv_combine_dst_u_float; imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = rvv_combine_dst_u_float; imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = rvv_combine_dst_u_float; imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = rvv_combine_dst_u_float; /* Set up function pointers */ imp->combine_32[PIXMAN_OP_CLEAR] = rvv_combine_clear; imp->combine_32[PIXMAN_OP_SRC] = rvv_combine_src_u; imp->combine_32[PIXMAN_OP_OVER] = rvv_combine_over_u; imp->combine_32[PIXMAN_OP_OVER_REVERSE] = rvv_combine_over_reverse_u; imp->combine_32[PIXMAN_OP_IN] = rvv_combine_in_u; imp->combine_32[PIXMAN_OP_IN_REVERSE] = rvv_combine_in_reverse_u; imp->combine_32[PIXMAN_OP_OUT] = rvv_combine_out_u; imp->combine_32[PIXMAN_OP_OUT_REVERSE] = rvv_combine_out_reverse_u; imp->combine_32[PIXMAN_OP_ATOP] = rvv_combine_atop_u; imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = rvv_combine_atop_reverse_u; imp->combine_32[PIXMAN_OP_XOR] = rvv_combine_xor_u; imp->combine_32[PIXMAN_OP_ADD] = rvv_combine_add_u; imp->combine_32[PIXMAN_OP_MULTIPLY] = rvv_combine_multiply_u; imp->combine_32[PIXMAN_OP_SCREEN] = rvv_combine_screen_u; imp->combine_32[PIXMAN_OP_OVERLAY] = rvv_combine_overlay_u; imp->combine_32[PIXMAN_OP_DARKEN] = rvv_combine_darken_u; imp->combine_32[PIXMAN_OP_LIGHTEN] = rvv_combine_lighten_u; imp->combine_32[PIXMAN_OP_HARD_LIGHT] = rvv_combine_hard_light_u; imp->combine_32[PIXMAN_OP_DIFFERENCE] = rvv_combine_difference_u; imp->combine_32[PIXMAN_OP_EXCLUSION] = rvv_combine_exclusion_u; imp->combine_32_ca[PIXMAN_OP_CLEAR] = rvv_combine_clear; imp->combine_32_ca[PIXMAN_OP_SRC] = rvv_combine_src_ca; imp->combine_32_ca[PIXMAN_OP_OVER] = rvv_combine_over_ca; imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = rvv_combine_over_reverse_ca; imp->combine_32_ca[PIXMAN_OP_IN] = rvv_combine_in_ca; imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = rvv_combine_in_reverse_ca; imp->combine_32_ca[PIXMAN_OP_OUT] = rvv_combine_out_ca; imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = rvv_combine_out_reverse_ca; imp->combine_32_ca[PIXMAN_OP_ATOP] = rvv_combine_atop_ca; imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = rvv_combine_atop_reverse_ca; imp->combine_32_ca[PIXMAN_OP_XOR] = rvv_combine_xor_ca; imp->combine_32_ca[PIXMAN_OP_ADD] = rvv_combine_add_ca; imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = rvv_combine_multiply_ca; imp->combine_32_ca[PIXMAN_OP_SCREEN] = rvv_combine_screen_ca; imp->combine_32_ca[PIXMAN_OP_OVERLAY] = rvv_combine_overlay_ca; imp->combine_32_ca[PIXMAN_OP_DARKEN] = rvv_combine_darken_ca; imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = rvv_combine_lighten_ca; imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = rvv_combine_hard_light_ca; imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = rvv_combine_difference_ca; imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = rvv_combine_exclusion_ca; imp->fill = rvv_fill; imp->blt = rvv_blt; return imp; } // clang-format on